From fd3c953d6975b7d2cd57d122454d47bbcb45db22 Mon Sep 17 00:00:00 2001 From: Ayush Tiwari Date: Fri, 3 Jul 2026 09:14:11 +0000 Subject: [PATCH v1] Add INSERT ... BY NAME to match source columns by name The SQL standard's forthcoming BY NAME option for INSERT allows the result columns of the source query to be matched to the target columns by name instead of by position. This is convenient when both sides are known by name but may be listed in a different order, for example: INSERT INTO t1 (c1, c2) BY NAME SELECT c1 * 10 AS c2, c2 + 5 AS c1 FROM t2; which assigns c2 + 5 to c1 and c1 * 10 to c2. The default positional matching can also be requested explicitly with BY POSITION. With BY NAME, each result column of the source query must match exactly one target column, and no two result columns may match the same target column. Target columns not named by the query receive their default values, just as when they are omitted from an explicit column list. BY NAME requires a query source with named columns and is rejected for VALUES and DEFAULT VALUES. Target column lists that use subfield or array assignments are also rejected, since matching by base column name would be ambiguous there. The BY NAME/BY POSITION clause can be written on either side of an OVERRIDING clause before the query source. BY POSITION is also accepted with DEFAULT VALUES as an explicit no-op. The matching is done during parse analysis by reordering the target column and attribute-number lists to line up with the source columns, so BY NAME is resolved into an ordinary positional column mapping in the query tree. As a result no changes to rule or view deparsing are needed. --- doc/src/sgml/ref/insert.sgml | 52 +++++++++ src/backend/parser/analyze.c | 119 +++++++++++++++++++ src/backend/parser/gram.y | 63 ++++++++++ src/include/nodes/parsenodes.h | 2 + src/test/regress/expected/insert.out | 169 +++++++++++++++++++++++++++ src/test/regress/sql/insert.sql | 98 ++++++++++++++++ 6 files changed, 503 insertions(+) diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml index 121a9edcb99..0fd40416a36 100644 --- a/doc/src/sgml/ref/insert.sgml +++ b/doc/src/sgml/ref/insert.sgml @@ -23,6 +23,7 @@ PostgreSQL documentation [ WITH [ RECURSIVE ] with_query [, ...] ] INSERT INTO table_name [ AS alias ] [ ( column_name [, ...] ) ] + [ BY { NAME | POSITION } ] [ OVERRIDING { SYSTEM | USER } VALUE ] { DEFAULT VALUES | VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } [ ON CONFLICT [ conflict_target ] conflict_action ] @@ -65,6 +66,34 @@ INSERT INTO table_name [ AS + + When the data source is a query, the optional + BY NAME clause changes how its result columns are + associated with the target columns: instead of being matched + left-to-right (BY POSITION, the default), each result + column of the query is matched to the target + column that has the same name. This is convenient when the source and + target columns are known by name but might be listed in a different order. + For example: + +INSERT INTO t1 (c1, c2) + BY NAME + SELECT c1 * 10 AS c2, c2 + 5 AS c1 FROM t2; + + assigns c2 + 5 to c1 and + c1 * 10 to c2. Every result column of + the query must match a target column, and no two + result columns may match the same target column. Target columns not named + by the query receive their default values, just as + if they had been omitted from the column list. BY NAME + cannot be used with VALUES or + DEFAULT VALUES, since those have no column names to match + against. BY POSITION is the default behavior; with + DEFAULT VALUES, it has no effect. + BY NAME cannot be used when the target column list + assigns to subfields or array elements. + + Each column not present in the explicit or implicit column list will be filled with a default value, either its declared default value @@ -214,6 +243,29 @@ INSERT INTO table_name [ AS + + BY NAME + BY POSITION + + + Specifies how the result columns of the source + query are matched to the + target columns. With BY POSITION (the default) they + are matched left-to-right. With BY NAME each result + column is matched to the target column of the same name; the result + columns may then appear in any order, and target columns not named by + the query are filled with + their default values. BY NAME requires a + query source and cannot be + combined with VALUES or + DEFAULT VALUES. With + DEFAULT VALUES, BY POSITION has no + effect. BY NAME cannot be used when the target + column list assigns to subfields or array elements. + + + + OVERRIDING SYSTEM VALUE diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 2932d17a107..863c3fa0242 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -76,6 +76,9 @@ post_parse_analyze_hook_type post_parse_analyze_hook = NULL; static Query *transformOptionalSelectInto(ParseState *pstate, Node *parseTree); static Query *transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt); static Query *transformInsertStmt(ParseState *pstate, InsertStmt *stmt); +static void transformInsertColsByName(ParseState *pstate, List *exprList, + List *srccolnames, List **icolumns, + List **attrnos); static OnConflictExpr *transformOnConflictClause(ParseState *pstate, OnConflictClause *onConflictClause); static ForPortionOfExpr *transformForPortionOfClause(ParseState *pstate, @@ -671,6 +674,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) List *sub_namespace; List *icolumns; List *attrnos; + List *srccolnames = NIL; ParseNamespaceItem *nsitem; RTEPermissionInfo *perminfo; ListCell *icols; @@ -719,6 +723,25 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) selectStmt->lockingClause != NIL || selectStmt->withClause != NULL)); + /* + * INSERT ... BY NAME matches the source columns to the target columns by + * their names, so it requires a query source that produces named columns. + * VALUES rows and DEFAULT VALUES have no column names to match against. + */ + if (stmt->byName) + { + if (selectStmt == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use BY NAME with DEFAULT VALUES"), + errhint("BY NAME requires a query, such as a SELECT, as the data source."))); + if (selectStmt->valuesLists != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use BY NAME with VALUES"), + errhint("BY NAME requires a query, such as a SELECT, as the data source."))); + } + /* * If a non-nil rangetable/namespace was passed in, and we are doing * INSERT/SELECT, arrange to pass the rangetable/rteperminfos/namespace @@ -859,8 +882,21 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) expr = (Expr *) var; } exprList = lappend(exprList, expr); + if (stmt->byName) + srccolnames = lappend(srccolnames, tle->resname); } + /* + * For INSERT ... BY NAME, match the source columns to the target + * columns by name and reorder the target column and attribute-number + * lists so that they line up positionally with the source columns. + * Target columns not named by the source are dropped here and will be + * filled with their default values. + */ + if (stmt->byName) + transformInsertColsByName(pstate, exprList, srccolnames, + &icolumns, &attrnos); + /* Prepare row for assignment to target table */ exprList = transformInsertRow(pstate, exprList, stmt->cols, @@ -1206,6 +1242,89 @@ transformInsertRow(ParseState *pstate, List *exprlist, return result; } +/* + * transformInsertColsByName - + * Reorder the INSERT target-column lists to match the source columns of an + * INSERT ... BY NAME by name. + * + * exprList: transformed source expressions, in source-column order + * srccolnames: source column names (list of char *), aligned with exprList + * icolumns: in/out; candidate target columns (list of ResTarget) + * attrnos: in/out; target attribute numbers, aligned with *icolumns + * + * On return, *icolumns and *attrnos are replaced with new lists that are + * aligned positionally with exprList: the Nth entry is the target column + * whose name matches the Nth source column. Each source column must match + * exactly one target column. Target columns that are not named by any source + * column are simply omitted, and will be assigned their default values when + * the target list is later expanded. + */ +static void +transformInsertColsByName(ParseState *pstate, List *exprList, + List *srccolnames, List **icolumns, List **attrnos) +{ + List *new_icolumns = NIL; + List *new_attrnos = NIL; + Bitmapset *matched = NULL; + ListCell *icols; + ListCell *lc_expr; + ListCell *lc_name; + + foreach(icols, *icolumns) + { + ResTarget *col = lfirst_node(ResTarget, icols); + + if (col->indirection != NIL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use BY NAME with subfield or array assignments"), + parser_errposition(pstate, col->location))); + } + + forboth(lc_expr, exprList, lc_name, srccolnames) + { + Expr *sexpr = (Expr *) lfirst(lc_expr); + char *sname = (char *) lfirst(lc_name); + ResTarget *matchcol = NULL; + int matchattno = 0; + ListCell *attnos; + + forboth(icols, *icolumns, attnos, *attrnos) + { + ResTarget *col = lfirst_node(ResTarget, icols); + + if (sname != NULL && col->name != NULL && + strcmp(sname, col->name) == 0) + { + matchcol = col; + matchattno = lfirst_int(attnos); + break; + } + } + + if (matchcol == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("source column \"%s\" has no matching target column", + sname ? sname : "?column?"), + parser_errposition(pstate, exprLocation((Node *) sexpr)))); + + if (bms_is_member(matchattno, matched)) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_COLUMN), + errmsg("column \"%s\" specified more than once", + matchcol->name), + parser_errposition(pstate, exprLocation((Node *) sexpr)))); + + matched = bms_add_member(matched, matchattno); + new_icolumns = lappend(new_icolumns, matchcol); + new_attrnos = lappend_int(new_attrnos, matchattno); + } + + *icolumns = new_icolumns; + *attrnos = new_attrnos; +} + /* * transformOnConflictClause - * transforms an OnConflictClause in an INSERT diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index ff4e1388c55..c0f181f04c6 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -650,6 +650,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type opt_existing_window_name %type opt_if_not_exists %type opt_unique_null_treatment +%type insert_by_clause %type generated_when override_kind opt_virtual_or_stored %type PartitionSpec OptPartitionSpec %type part_elem @@ -13039,6 +13040,29 @@ insert_rest: $$->override = $2; $$->selectStmt = $4; } + | OVERRIDING override_kind VALUE_P insert_by_clause SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = NIL; + $$->override = $2; + $$->byName = $4; + $$->selectStmt = $5; + } + | insert_by_clause SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = NIL; + $$->byName = $1; + $$->selectStmt = $2; + } + | insert_by_clause OVERRIDING override_kind VALUE_P SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = NIL; + $$->byName = $1; + $$->override = $3; + $$->selectStmt = $5; + } | '(' insert_column_list ')' SelectStmt { $$ = makeNode(InsertStmt); @@ -13052,12 +13076,42 @@ insert_rest: $$->override = $5; $$->selectStmt = $7; } + | '(' insert_column_list ')' OVERRIDING override_kind VALUE_P insert_by_clause SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = $2; + $$->override = $5; + $$->byName = $7; + $$->selectStmt = $8; + } + | '(' insert_column_list ')' insert_by_clause SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = $2; + $$->byName = $4; + $$->selectStmt = $5; + } + | '(' insert_column_list ')' insert_by_clause OVERRIDING override_kind VALUE_P SelectStmt + { + $$ = makeNode(InsertStmt); + $$->cols = $2; + $$->byName = $4; + $$->override = $6; + $$->selectStmt = $8; + } | DEFAULT VALUES { $$ = makeNode(InsertStmt); $$->cols = NIL; $$->selectStmt = NULL; } + | insert_by_clause DEFAULT VALUES + { + $$ = makeNode(InsertStmt); + $$->cols = NIL; + $$->byName = $1; + $$->selectStmt = NULL; + } ; override_kind: @@ -13065,6 +13119,15 @@ override_kind: | SYSTEM_P { $$ = OVERRIDING_SYSTEM_VALUE; } ; +/* + * BY NAME matches the source columns to the target columns by name; BY + * POSITION requests the default positional matching explicitly. + */ +insert_by_clause: + BY NAME_P { $$ = true; } + | BY POSITION { $$ = false; } + ; + insert_column_list: insert_column_item { $$ = list_make1($1); } diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4133c404a6b..951e8bd46da 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2213,6 +2213,8 @@ typedef struct InsertStmt ReturningClause *returningClause; /* RETURNING clause */ WithClause *withClause; /* WITH clause */ OverridingKind override; /* OVERRIDING clause */ + bool byName; /* BY NAME: match source columns to target + * columns by name rather than by position */ } InsertStmt; /* ---------------------- diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index 75b8de79fce..09d6b4b4419 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -1096,3 +1096,172 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest; (1 row) drop table returningwrtest; +-- ****** +-- * INSERT ... BY NAME +-- ****** +create table byname_t (a int, b int, c int default 42); +-- BY NAME matches the query's result columns to target columns by name, +-- regardless of order +insert into byname_t (a, b) by name select 2 as b, 1 as a; +select a, b, c from byname_t order by a; + a | b | c +---+---+---- + 1 | 2 | 42 +(1 row) + +-- Without an explicit column list, all table columns are candidates; columns +-- not named by the query receive their default values +truncate byname_t; +insert into byname_t by name select 10 as c, 20 as a; +select a, b, c from byname_t order by a; + a | b | c +----+---+---- + 20 | | 10 +(1 row) + +-- The same rule applies when an explicit column list is present +truncate byname_t; +insert into byname_t (a, b) by name select 30 as a; +select a, b, c from byname_t order by a; + a | b | c +----+---+---- + 30 | | 42 +(1 row) + +-- BY POSITION is the default, spelled out explicitly +truncate byname_t; +insert into byname_t (a, b, c) by position select 1, 2, 3; +select a, b, c from byname_t order by a; + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +truncate byname_t; +insert into byname_t by position values (4, 5, 6); +select a, b, c from byname_t order by a; + a | b | c +---+---+--- + 4 | 5 | 6 +(1 row) + +truncate byname_t; +insert into byname_t by position default values; +select a, b, c from byname_t order by a; + a | b | c +---+---+---- + | | 42 +(1 row) + +-- source column order does not matter with BY NAME +truncate byname_t; +insert into byname_t by name select 3 as c, 1 as a, 2 as b; +select a, b, c from byname_t order by a; + a | b | c +---+---+--- + 1 | 2 | 3 +(1 row) + +-- BY NAME works with a general query (CTE, expressions) +truncate byname_t; +with src(x, y) as (values (100, 200)) + insert into byname_t by name select y as b, x as a from src; +select a, b, c from byname_t order by a; + a | b | c +-----+-----+---- + 100 | 200 | 42 +(1 row) + +-- BY NAME is case sensitive and honors quoted identifiers +create table byname_case ("Col" int, col int); +insert into byname_case by name select 1 as "Col", 2 as col; +select "Col", col from byname_case; + Col | col +-----+----- + 1 | 2 +(1 row) + +drop table byname_case; +-- error: a result column has no matching target column +insert into byname_t by name select 1 as a, 2 as zzz; +ERROR: source column "zzz" has no matching target column +LINE 1: insert into byname_t by name select 1 as a, 2 as zzz; + ^ +-- error: an explicit target list narrows the candidate columns +insert into byname_t (a, b) by name select 1 as a, 2 as c; +ERROR: source column "c" has no matching target column +LINE 1: insert into byname_t (a, b) by name select 1 as a, 2 as c; + ^ +-- error: two result columns map to the same target column +insert into byname_t by name select 1 as a, 2 as a; +ERROR: column "a" specified more than once +LINE 1: insert into byname_t by name select 1 as a, 2 as a; + ^ +-- error: generated result column names still need a matching target column +insert into byname_t by name select 1, 2; +ERROR: source column "?column?" has no matching target column +LINE 1: insert into byname_t by name select 1, 2; + ^ +-- error: target column indirection cannot be matched unambiguously by name +create type byname_pair as (x int, y int); +create table byname_comp (c byname_pair); +insert into byname_comp(c.x, c.y) by name select 1 as c; +ERROR: cannot use BY NAME with subfield or array assignments +LINE 1: insert into byname_comp(c.x, c.y) by name select 1 as c; + ^ +drop table byname_comp; +drop type byname_pair; +-- error: BY NAME requires a query source +insert into byname_t by name values (1, 2, 3); +ERROR: cannot use BY NAME with VALUES +HINT: BY NAME requires a query, such as a SELECT, as the data source. +insert into byname_t (a, b) by name values (1, 2); +ERROR: cannot use BY NAME with VALUES +HINT: BY NAME requires a query, such as a SELECT, as the data source. +insert into byname_t by name default values; +ERROR: cannot use BY NAME with DEFAULT VALUES +HINT: BY NAME requires a query, such as a SELECT, as the data source. +drop table byname_t; +-- BY NAME combined with OVERRIDING and an identity column +create table byname_ident (id int generated always as identity, + val int, note text default 'x'); +insert into byname_ident (id, val) by name overriding system value + select 99 as val, 5 as id; +insert into byname_ident (id, val) overriding system value by name + select 100 as val, 6 as id; +select id, val, note from byname_ident order by id; + id | val | note +----+-----+------ + 5 | 99 | x + 6 | 100 | x +(2 rows) + +drop table byname_ident; +-- BY NAME combined with ON CONFLICT and RETURNING +create table byname_conflict (id int primary key, a int, b int default 9); +insert into byname_conflict by name select 1 as id, 2 as a; +insert into byname_conflict by name select 1 as id, 99 as a + on conflict do nothing; +select id, a, b from byname_conflict order by id; + id | a | b +----+---+--- + 1 | 2 | 9 +(1 row) + +insert into byname_conflict by name select 5 as a, 1 as id + on conflict (id) do update set a = excluded.a; +insert into byname_conflict by name select 2 as id, 7 as a + returning id, a, b; + id | a | b +----+---+--- + 2 | 7 | 9 +(1 row) + +select id, a, b from byname_conflict order by id; + id | a | b +----+---+--- + 1 | 5 | 9 + 2 | 7 | 9 +(2 rows) + +drop table byname_conflict; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index 2b086eeb6d7..4b1cb9b49c7 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -674,3 +674,101 @@ alter table returningwrtest2 drop c; alter table returningwrtest attach partition returningwrtest2 for values in (2); insert into returningwrtest values (2, 'foo') returning returningwrtest; drop table returningwrtest; + +-- ****** +-- * INSERT ... BY NAME +-- ****** +create table byname_t (a int, b int, c int default 42); + +-- BY NAME matches the query's result columns to target columns by name, +-- regardless of order +insert into byname_t (a, b) by name select 2 as b, 1 as a; +select a, b, c from byname_t order by a; + +-- Without an explicit column list, all table columns are candidates; columns +-- not named by the query receive their default values +truncate byname_t; +insert into byname_t by name select 10 as c, 20 as a; +select a, b, c from byname_t order by a; + +-- The same rule applies when an explicit column list is present +truncate byname_t; +insert into byname_t (a, b) by name select 30 as a; +select a, b, c from byname_t order by a; + +-- BY POSITION is the default, spelled out explicitly +truncate byname_t; +insert into byname_t (a, b, c) by position select 1, 2, 3; +select a, b, c from byname_t order by a; +truncate byname_t; +insert into byname_t by position values (4, 5, 6); +select a, b, c from byname_t order by a; +truncate byname_t; +insert into byname_t by position default values; +select a, b, c from byname_t order by a; + +-- source column order does not matter with BY NAME +truncate byname_t; +insert into byname_t by name select 3 as c, 1 as a, 2 as b; +select a, b, c from byname_t order by a; + +-- BY NAME works with a general query (CTE, expressions) +truncate byname_t; +with src(x, y) as (values (100, 200)) + insert into byname_t by name select y as b, x as a from src; +select a, b, c from byname_t order by a; + +-- BY NAME is case sensitive and honors quoted identifiers +create table byname_case ("Col" int, col int); +insert into byname_case by name select 1 as "Col", 2 as col; +select "Col", col from byname_case; +drop table byname_case; + +-- error: a result column has no matching target column +insert into byname_t by name select 1 as a, 2 as zzz; + +-- error: an explicit target list narrows the candidate columns +insert into byname_t (a, b) by name select 1 as a, 2 as c; + +-- error: two result columns map to the same target column +insert into byname_t by name select 1 as a, 2 as a; + +-- error: generated result column names still need a matching target column +insert into byname_t by name select 1, 2; + +-- error: target column indirection cannot be matched unambiguously by name +create type byname_pair as (x int, y int); +create table byname_comp (c byname_pair); +insert into byname_comp(c.x, c.y) by name select 1 as c; +drop table byname_comp; +drop type byname_pair; + +-- error: BY NAME requires a query source +insert into byname_t by name values (1, 2, 3); +insert into byname_t (a, b) by name values (1, 2); +insert into byname_t by name default values; + +drop table byname_t; + +-- BY NAME combined with OVERRIDING and an identity column +create table byname_ident (id int generated always as identity, + val int, note text default 'x'); +insert into byname_ident (id, val) by name overriding system value + select 99 as val, 5 as id; +insert into byname_ident (id, val) overriding system value by name + select 100 as val, 6 as id; +select id, val, note from byname_ident order by id; +drop table byname_ident; + +-- BY NAME combined with ON CONFLICT and RETURNING +create table byname_conflict (id int primary key, a int, b int default 9); +insert into byname_conflict by name select 1 as id, 2 as a; +insert into byname_conflict by name select 1 as id, 99 as a + on conflict do nothing; +select id, a, b from byname_conflict order by id; +insert into byname_conflict by name select 5 as a, 1 as id + on conflict (id) do update set a = excluded.a; +insert into byname_conflict by name select 2 as id, 7 as a + returning id, a, b; +select id, a, b from byname_conflict order by id; +drop table byname_conflict; -- 2.43.0