From baf267b35bbfe0197a510ab9a5a3f01ee8c23c6c Mon Sep 17 00:00:00 2001 From: Matheus Alcantara Date: Mon, 29 Jun 2026 10:49:43 -0300 Subject: [PATCH v2] Add QUALIFY clause QUALIFY filters rows using the result of a window function without requiring the query to be wrapped in a subquery or CTE. It is to window functions what HAVING is to aggregates: a post-SELECT filter, evaluated after the window functions have been computed but before DISTINCT, ORDER BY, and LIMIT. The condition may contain window functions directly, whether or not they also appear in the select list, and it may reference select-list output names, which are not visible to WHERE or HAVING. The clause is implemented in the parser by rewriting a query specification that carries a QUALIFY clause into an equivalent two-level query: an inner subquery holding the table expression and select list, wrapped by an outer query whose WHERE clause is the QUALIFY condition. This follows the syntactic transformation described in the SQL standard change proposal. Because the planner already optimizes window functions that appear inside a subquery, the rewritten form obtains window run conditions and quals pushdown without any new planner code, producing plans identical to those of the equivalent hand-written subquery. The condition is transformed in the scope of the inner query, so a name that refers to a column of the FROM tables resolves to that column in preference to a select-list alias of the same name. Window functions, aggregates, and inner columns referenced by the condition are projected through the subquery and referenced from the outer WHERE clause by Vars nodes, which preserves the comparison shape that the run-condition machinery recognizes. Because QUALIFY is evaluated after the select list, the ORDER BY, DISTINCT, and LIMIT clauses are processed against the outer query. QUALIFY is added as a reserved keyword, as required by the standard. --- doc/src/sgml/ref/select.sgml | 99 +++ src/backend/nodes/nodeFuncs.c | 2 + src/backend/parser/analyze.c | 504 +++++++++++ src/backend/parser/gram.y | 16 +- src/backend/parser/parse_agg.c | 4 + src/backend/parser/parse_expr.c | 4 + src/backend/parser/parse_func.c | 3 + src/include/nodes/parsenodes.h | 1 + src/include/parser/kwlist.h | 1 + src/include/parser/parse_node.h | 1 + src/test/regress/expected/qualify.out | 1122 +++++++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/qualify.sql | 563 +++++++++++++ src/tools/pgindent/typedefs.list | 3 + 14 files changed, 2320 insertions(+), 5 deletions(-) create mode 100644 src/test/regress/expected/qualify.out create mode 100644 src/test/regress/sql/qualify.sql diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 09b6ce809bb..7387dba5549 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -40,6 +40,7 @@ SELECT [ ALL | DISTINCT [ ON ( expressiongrouping_element [, ...] } ] [ HAVING condition ] [ WINDOW window_name AS ( window_definition ) [, ...] ] + [ QUALIFY condition ] [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ] [ ORDER BY expression [ ASC | DESC | USING operator ] [ NULLS { FIRST | LAST } ] [, ...] ] [ LIMIT { count | ALL } ] @@ -146,6 +147,16 @@ TABLE [ ONLY ] table_name [ * ] + + + If the QUALIFY clause is specified, all rows + that do not satisfy the condition are eliminated from the output. + Because QUALIFY is applied after the output rows + have been computed, its condition can reference window functions + and output column names. (See below.) + + + SELECT DISTINCT eliminates duplicate rows from the result. SELECT DISTINCT ON eliminates rows that @@ -1242,6 +1253,83 @@ EXCLUDE NO OTHERS + + <literal>QUALIFY</literal> Clause + + + The optional QUALIFY clause has the general form + +QUALIFY condition + + where condition is + any expression that evaluates to a result of type + boolean, the same as specified for the + WHERE clause. + + + + QUALIFY eliminates rows that do not satisfy the + condition. It is applied after window functions have been computed, + so its condition may refer to the results of window functions. In + this respect QUALIFY is to window functions what + HAVING is to + aggregate functions: each filters the result of its corresponding + computation stage. Without QUALIFY, filtering on + the result of a window function requires wrapping the query in a + sub-query (or common table expression) and applying a + WHERE clause there, because window functions are + computed after WHERE and HAVING. + A row is retained only if the condition evaluates to true; rows for + which it evaluates to false or null are eliminated. + + + + The condition may contain window functions, whether or not the same + window functions also appear in the SELECT list. + These window functions may use an inline OVER + clause or refer to a window defined in the + WINDOW clause; a + WINDOW clause is not required. Because + QUALIFY is evaluated after the + SELECT list, the condition may also reference + output column names (column aliases defined in the + SELECT list), unlike WHERE and + HAVING. In case of ambiguity, a name in + QUALIFY is interpreted as an input-column name + rather than an output column name. + + + + QUALIFY does not require window functions to be + present; it is a general filter applied to the computed output rows. + A condition that does not involve a window function and refers only to + input columns produces the same result as if it had been written in + WHERE, though writing such conditions in + WHERE is preferred. Aggregate functions are + permitted in the condition under the same rules that apply to the + SELECT list, since aggregation has already occurred + by the time QUALIFY is evaluated. + + + + The following query returns, for each department, only the + highest-paid employees, filtering on the result of a window function + without a sub-query: + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS pos +FROM empsalary +QUALIFY pos = 1; + + Here the output column name pos defined in the + SELECT list is referenced in + QUALIFY. The window function could equally be + spelled out in the QUALIFY condition itself, in + which case it need not appear in the SELECT list at + all. + + + <literal>DISTINCT</literal> Clause @@ -2045,6 +2133,17 @@ FROM manufacturers m LEFT JOIN LATERAL get_product_names(m.id) pname ON true; missing features. + + The <literal>QUALIFY</literal> Clause + + + The QUALIFY clause conforms to the SQL standard + (feature O001, QUALIFY clause). Note that + QUALIFY is a reserved word; existing applications + that use it as an identifier must quote it or choose another name. + + + Omitted <literal>FROM</literal> Clauses diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 2a2e00b372e..36cae30c0b3 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -4488,6 +4488,8 @@ raw_expression_tree_walker_impl(Node *node, return true; if (WALK(stmt->rarg)) return true; + if (WALK(stmt->qualifyClause)) + return true; } break; case T_PLAssignStmt: diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 2932d17a107..f17252f891e 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -52,6 +52,7 @@ #include "parser/parse_target.h" #include "parser/parse_type.h" #include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" #include "utils/backend_status.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -70,6 +71,31 @@ typedef struct SelectStmtPassthrough List *indirection; /* indirection yet to be applied to target */ } SelectStmtPassthrough; +/* State for the QUALIFY select-list alias resolution hook */ +typedef struct QualifyHookState +{ + List *targetList; /* target list to match aliases against */ + PostParseColumnRefHook prev_hook; /* previously installed hook, if any */ + void *prev_hook_state; /* its state */ +} QualifyHookState; + +/* Working state for qualifyExtractMutator */ +typedef struct QualifyExtractContext +{ + List **innerTargetList; /* inner query's target list (appended to) */ + int subquery_varno; /* varno of the subquery RTE in the outer + * query */ + int next_resno; /* next resno to assign to appended entries */ +} QualifyExtractContext; + +/* Working state for qualifyFixupVarattno */ +typedef struct QualifyVarmapContext +{ + int subquery_varno; /* varno whose Vars are to be remapped */ + AttrNumber *attmap; /* map from old resno to new resno */ +} QualifyVarmapContext; + + /* Hook for plugins to get control at end of parse analysis */ post_parse_analyze_hook_type post_parse_analyze_hook = NULL; @@ -108,6 +134,10 @@ static Query *transformCallStmt(ParseState *pstate, CallStmt *stmt); static void transformLockingClause(ParseState *pstate, Query *qry, LockingClause *lc, bool pushedDown); +static Node *qualifyPostColumnRefHook(ParseState *pstate, ColumnRef *cref, + Node *var); +static Query *rewriteQualifyQuery(ParseState *pstate, Query *innerQuery, + Node *qualifyExpr); #ifdef DEBUG_NODE_TESTS_ENABLED static bool test_raw_expression_coverage(Node *node, void *context); #endif @@ -1724,6 +1754,427 @@ count_rowexpr_columns(ParseState *pstate, Node *expr) return -1; } +/* + * qualifyPostColumnRefHook + * Resolve a QUALIFY column references + * + * QUALIFY column references can reference table columns and target alias. A + * real column of the FROM tables take precedence over a select-list alias of + * the same name. + */ +static Node * +qualifyPostColumnRefHook(ParseState *pstate, ColumnRef *cref, Node *var) +{ + QualifyHookState *qstate = (QualifyHookState *) pstate->p_ref_hook_state; + char *name; + TargetEntry *matched = NULL; + ListCell *lc; + + /* A real column of the FROM tables takes precedence over an alias. */ + if (var != NULL) + return NULL; + + /* Only unqualified (single-field) names can match a select-list alias. */ + if (list_length(cref->fields) != 1 || !IsA(linitial(cref->fields), String)) + return NULL; + + name = strVal(linitial(cref->fields)); + + foreach(lc, qstate->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk || tle->resname == NULL || strcmp(tle->resname, name) != 0) + continue; + + if (matched != NULL && !equal(matched->expr, tle->expr)) + ereport(ERROR, + (errcode(ERRCODE_AMBIGUOUS_COLUMN), + errmsg("column reference \"%s\" is ambiguous", name), + parser_errposition(pstate, cref->location))); + matched = tle; + } + + if (matched == NULL) + return NULL; + + /* + * XXX: an alias whose expression is non-deterministic would be evaluated + * once for the select-list column and again in the outer filter built + * from the QUALIFY condition, possibly producing different values. Until + * such references are resolved by the identity of the select-list entry + * (projecting it once and referencing it by a single Var), reject + * volatile alias expressions. + */ + if (contain_volatile_functions((Node *) matched->expr)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("QUALIFY cannot reference output column \"%s\" whose expression is non-deterministic", + name), + parser_errposition(pstate, cref->location))); + + return copyObject(matched->expr); +} + +/* + * qualifyExtractMutator + * Rewrite a QUALIFY condition so it can serve as the WHERE clause of the + * outer query built by rewriteQualifyQuery. + * + * Every window function, aggregate, and Var of the inner query is computed by + * the inner query and referenced from the outer query by a plain Var: we find + * an existing target entry with an equal expression, or append a new resjunk + * one, and replace the node with an outer Var. Comparisons and other + * operators are left in place at the outer level. This reproduces the shape + * of a hand-written subquery, so the planner's existing optimizations for + * window functions in subqueries (run conditions and qual pushdown) apply + * unchanged. + */ +static Node * +qualifyExtractMutator(Node *node, QualifyExtractContext *ctx) +{ + if (node == NULL) + return NULL; + + if (IsA(node, WindowFunc) || IsA(node, Aggref) + || (IsA(node, Var) && ((Var *) node)->varlevelsup == 0)) + { + TargetEntry *tle = NULL; + + /* Reuse a matching target entry so a value isn't projected twice. */ + foreach_ptr(TargetEntry, cur, *ctx->innerTargetList) + { + if (equal(cur->expr, node)) + { + tle = cur; + break; + } + } + + /* + * Did not find the target entry on target list so add it on inner + * query target list so it can be referenced on the outer query. + */ + if (tle == NULL) + { + tle = makeTargetEntry((Expr *) copyObject(node), + ctx->next_resno++, + NULL, + false); + *ctx->innerTargetList = lappend(*ctx->innerTargetList, tle); + } + + /* + * The entry is referenced from the outer query, so it must be a real + * (non-resjunk) output column of the subquery. A reused entry might + * be a resjunk grouping column or a junk ORDER BY/DISTINCT target, so + * clear the flag here. + */ + tle->resjunk = false; + + return (Node *) makeVarFromTargetEntry(ctx->subquery_varno, tle); + } + + return expression_tree_mutator(node, qualifyExtractMutator, ctx); +} + +/* + * qualifyFixupVarattno + * Remap the attribute numbers of Vars referencing the QUALIFY subquery, + * after the subquery's target list has been renumbered. + */ +static bool +qualifyFixupVarattno(Node *node, QualifyVarmapContext *ctx) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == 0 && var->varno == ctx->subquery_varno) + { + var->varattno = ctx->attmap[var->varattno]; + var->varattnosyn = var->varattno; + } + return false; + } + + return expression_tree_walker(node, qualifyFixupVarattno, ctx); +} + +/* + * rewriteQualifyQuery + * Implement the QUALIFY clause by rewriting the inner query in an outer + * query whose WHERE clause is the QUALIFY condition. + * + * 'innerQuery' is the fully-analyzed query for the table expression and select + * list, with the resolved QUALIFY condition appended to its target list as a + * resjunk entry. We remove that entry, decompose the condition into the outer + * WHERE clause (extracting window functions, aggregates, and inner Vars into + * the inner target list), and process the deferred ORDER BY, DISTINCT, and + * LIMIT clauses against the outer query. + */ +static Query * +rewriteQualifyQuery(ParseState *pstate, Query *innerQuery, Node *qualifyExpr) +{ + Query *outerQuery = makeNode(Query); + List *outerTargetList = NIL; + List *origTargetList = NIL; + List *nonjunk = NIL; + List *junk = NIL; + Bitmapset *liftrefs = NULL; + ParseState *opstate; + ParseNamespaceItem *nsitem; + QualifyExtractContext ctx; + Node *outerQual; + List *outerSortClause; + List *outerDistinctClause; + bool outerHasDistinctOn; + Node *outerLimitOffset; + Node *outerLimitCount; + LimitOption outerLimitOption; + ListCell *lc; + int resno; + int ntargets; + AttrNumber *attmap; + QualifyVarmapContext mapctx; + AttrNumber newresno; + + /* + * ORDER BY, DISTINCT, and LIMIT were transformed against the inner + * query's scope but are logically evaluated after QUALIFY, so they move + * to the outer query. Detach them from the inner query; the sort and + * distinct clauses are re-attached below after the columns they reference + * are exposed as subquery outputs. + */ + outerSortClause = innerQuery->sortClause; + outerDistinctClause = innerQuery->distinctClause; + outerHasDistinctOn = innerQuery->hasDistinctOn; + outerLimitOffset = innerQuery->limitOffset; + outerLimitCount = innerQuery->limitCount; + outerLimitOption = innerQuery->limitOption; + innerQuery->sortClause = NIL; + innerQuery->distinctClause = NIL; + innerQuery->hasDistinctOn = false; + innerQuery->limitOffset = NULL; + innerQuery->limitCount = NULL; + innerQuery->limitOption = LIMIT_OPTION_COUNT; + + /* Collect the sort/group refs that the lifted ORDER BY / DISTINCT use. */ + foreach(lc, outerSortClause) + liftrefs = bms_add_member(liftrefs, + ((SortGroupClause *) lfirst(lc))->tleSortGroupRef); + foreach(lc, outerDistinctClause) + liftrefs = bms_add_member(liftrefs, + ((SortGroupClause *) lfirst(lc))->tleSortGroupRef); + + /* + * Remove the temporary resjunk QUALIFY entry from the inner target list + * required by parseCheckAggregates. + */ + foreach(lc, innerQuery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->expr == (Expr *) qualifyExpr) + { + Assert(tle->resjunk); + innerQuery->targetList = foreach_delete_current(innerQuery->targetList, lc); + break; + } + } + + /* + * Capture the original select-list output columns now, before extraction + * and lifting may append further (non-resjunk) columns or un-junk + * grouping/sort columns; these are what the outer query projects. + */ + foreach_ptr(TargetEntry, tle, innerQuery->targetList) + { + if (!tle->resjunk) + origTargetList = lappend(origTargetList, tle); + } + + /* + * Extract the QUALIFY condition into the outer WHERE clause, projecting + * the inner-query expressions it needs through the subquery. The subquery + * will be range-table entry 1 of the outer query. + */ + ctx.innerTargetList = &innerQuery->targetList; + ctx.subquery_varno = 1; + ctx.next_resno = list_length(innerQuery->targetList) + 1; + outerQual = qualifyExtractMutator(qualifyExpr, &ctx); + + /* + * Expose as non-resjunk subquery outputs, the inner columns that the + * lifted ORDER BY / DISTINCT clauses reference, so the outer query can + * refer to them. Grouping columns not referenced by ORDER BY/DISTINCT + * stay resjunk. + */ + foreach_ptr(TargetEntry, tle, innerQuery->targetList) + { + if (tle->ressortgroupref != 0 && bms_is_member(tle->ressortgroupref, liftrefs)) + tle->resjunk = false; + } + + /* + * A subquery range table entry requires its non-resjunk columns to come + * first with contiguous resnos, and every output column must have a name. + * Reorder the inner target list accordingly, name any anonymous output + * columns, and remap the resnos referenced by the outer WHERE clause. + */ + ntargets = list_length(innerQuery->targetList); + attmap = (AttrNumber *) palloc0((ntargets + 1) * sizeof(AttrNumber)); + + foreach(lc, innerQuery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + junk = lappend(junk, tle); + else + nonjunk = lappend(nonjunk, tle); + } + innerQuery->targetList = list_concat(nonjunk, junk); + + newresno = 1; + foreach(lc, innerQuery->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + attmap[tle->resno] = newresno; + tle->resno = newresno; + + /* + * Output columns created by extraction are anonymous. Give a Var the + * name of its source column, so the lifted ORDER BY/DISTINCT and the + * outer WHERE can reference an input column (e.g. "QUALIFY x > 0 + * ORDER BY x"); fall back to a generated name for non-Var + * expressions. + */ + if (!tle->resjunk && tle->resname == NULL) + { + if (IsA(tle->expr, Var) && ((Var *) tle->expr)->varattno > 0) + { + Var *var = (Var *) tle->expr; + + tle->resname = get_rte_attribute_name(rt_fetch(var->varno, + innerQuery->rtable), + var->varattno); + } + if (tle->resname == NULL) + tle->resname = psprintf("qualify%d", newresno); + } + newresno++; + } + + /* Now fix the QUALIFY Var references */ + mapctx.subquery_varno = ctx.subquery_varno; + mapctx.attmap = attmap; + qualifyFixupVarattno(outerQual, &mapctx); + + /* + * The inner query is now one level deeper than it was, so bump the level + * of any outer references it contains. The outer WHERE clause (and the + * lifted clauses) stay at the current level. + */ + IncrementVarSublevelsUp((Node *) innerQuery, 1, 1); + + /* Build the outer query's parse state, sharing the original parent. */ + opstate = make_parsestate(pstate->parentParseState); + opstate->p_sourcetext = pstate->p_sourcetext; + opstate->p_pre_columnref_hook = pstate->p_pre_columnref_hook; + opstate->p_post_columnref_hook = pstate->p_post_columnref_hook; + opstate->p_paramref_hook = pstate->p_paramref_hook; + opstate->p_coerce_param_hook = pstate->p_coerce_param_hook; + opstate->p_ref_hook_state = pstate->p_ref_hook_state; + + outerQuery->commandType = CMD_SELECT; + + nsitem = addRangeTableEntryForSubquery(opstate, innerQuery, + makeAlias("pg_qualify", NIL), + false, true); + addNSItemToQuery(opstate, nsitem, true, true, true); + + /* Outer target list: project the original select-list output columns. */ + resno = 1; + foreach(lc, origTargetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + Var *var = makeVarFromTargetEntry(ctx.subquery_varno, tle); + + outerTargetList = lappend(outerTargetList, + makeTargetEntry((Expr *) var, + resno++, + tle->resname ? pstrdup(tle->resname) : NULL, + false)); + } + + /* + * Re-attach the lifted ORDER BY / DISTINCT clauses. For each inner + * column they reference, give the outer target that projects it the same + * sort/ group ref so the moved SortGroupClauses line up; a referenced + * column not in the projection gets a resjunk outer target. + */ + foreach_ptr(TargetEntry, innerTle, innerQuery->targetList) + { + TargetEntry *out = NULL; + + if (innerTle->ressortgroupref == 0 || !bms_is_member(innerTle->ressortgroupref, liftrefs)) + continue; + + foreach_ptr(TargetEntry, outerTle, outerTargetList) + { + + if (IsA(outerTle->expr, Var) && ((Var *) outerTle->expr)->varattno == innerTle->resno) + { + out = outerTle; + break; + } + } + + if (out == NULL) + { + Var *var = makeVarFromTargetEntry(ctx.subquery_varno, innerTle); + + out = makeTargetEntry((Expr *) var, + resno++, + innerTle->resname ? pstrdup(innerTle->resname) : NULL, + true); + outerTargetList = lappend(outerTargetList, out); + } + out->ressortgroupref = innerTle->ressortgroupref; + } + + outerQuery->targetList = outerTargetList; + outerQuery->rtable = opstate->p_rtable; + outerQuery->rteperminfos = opstate->p_rteperminfos; + outerQuery->jointree = makeFromExpr(opstate->p_joinlist, outerQual); + + outerQuery->sortClause = outerSortClause; + outerQuery->distinctClause = outerDistinctClause; + outerQuery->hasDistinctOn = outerHasDistinctOn; + outerQuery->limitOffset = outerLimitOffset; + outerQuery->limitCount = outerLimitCount; + outerQuery->limitOption = outerLimitOption; + + /* + * The outer query has no aggregates or window functions (those were + * extracted into the subquery), so only sub-links need to be reported. + */ + outerQuery->hasSubLinks = checkExprHasSubLink(outerQual) || + checkExprHasSubLink(outerLimitOffset) || + checkExprHasSubLink(outerLimitCount); + + assign_query_collations(opstate, outerQuery); + + free_parsestate(opstate); + + return outerQuery; +} + /* * transformSelectStmt - @@ -1746,6 +2197,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt, Query *qry = makeNode(Query); Node *qual; ListCell *l; + bool hasQualify = (stmt->qualifyClause != NULL); + Node *qualifyExpr = NULL; qry->commandType = CMD_SELECT; @@ -1802,6 +2255,13 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt, * transformGroupClause and transformDistinctClause need the results. Note * that these functions can also change the targetList, so it's passed to * them by reference. + * + * When a QUALIFY clause is present, ORDER BY/DISTINCT/LIMIT are logically + * evaluated after QUALIFY and so belong to the outer query that + * rewriteQualifyQuery() builds. We still transform them here, against + * the inner query's scope, so that they can reference input columns and + * select-list aliases as in any other query; rewriteQualifyQuery() then + * lifts the resulting clauses out to the outer query. */ qry->sortClause = transformSortClause(pstate, stmt->sortClause, @@ -1853,6 +2313,42 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt, stmt->limitOption); qry->limitOption = stmt->limitOption; + /* + * Transform the QUALIFY condition, if any. This must happen before + * transformWindowDefinitions() so that window functions appearing only in + * QUALIFY get their inline window specifications registered and turned + * into WindowClauses. QUALIFY may reference select-list aliases, so we + * install a post-columnref hook that resolves a bare name to a + * select-list alias. + * + * The resolved condition is appended to the target list as a resjunk + * entry so that parseCheckAggregates() validates it like a select-list + * item; it is removed and decomposed into the outer query's WHERE clause + * by rewriteQualifyQuery() below. + */ + if (hasQualify) + { + QualifyHookState qstate; + + qstate.targetList = qry->targetList; + qstate.prev_hook = pstate->p_post_columnref_hook; + qstate.prev_hook_state = pstate->p_ref_hook_state; + pstate->p_post_columnref_hook = qualifyPostColumnRefHook; + pstate->p_ref_hook_state = &qstate; + + qualifyExpr = transformWhereClause(pstate, stmt->qualifyClause, + EXPR_KIND_QUALIFY, "QUALIFY"); + + pstate->p_post_columnref_hook = qstate.prev_hook; + pstate->p_ref_hook_state = qstate.prev_hook_state; + + qry->targetList = lappend(qry->targetList, + makeTargetEntry((Expr *) qualifyExpr, + list_length(qry->targetList) + 1, + NULL, + true)); + } + /* transform window clauses after we have seen all window functions */ qry->windowClause = transformWindowDefinitions(pstate, pstate->p_windowdefs, @@ -1883,6 +2379,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt, if (pstate->p_hasAggs || qry->groupClause || qry->groupingSets || qry->havingQual) parseCheckAggregates(pstate, qry); + /* + * If a QUALIFY clause was present, wrap the query just built (the inner + * query) in an outer query whose WHERE clause is the QUALIFY condition, + * and process the deferred ORDER BY/DISTINCT/LIMIT clauses against it. + */ + if (hasQualify) + return rewriteQualifyQuery(pstate, qry, qualifyExpr); + return qry; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index ff4e1388c55..942beb1ecb6 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -532,7 +532,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type TableElement TypedTableElement ConstraintElem DomainConstraintElem TableFuncElement %type columnDef columnOptions optionalPeriodName %type def_elem reloption_elem old_aggr_elem operator_def_elem -%type def_arg columnElem where_clause where_or_current_clause +%type def_arg columnElem where_clause qualify_clause where_or_current_clause a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound columnref having_clause func_table xmltable array_expr OptWhereClause operator_def_arg @@ -806,7 +806,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROCEDURES PROGRAM PROPERTIES PROPERTY PUBLICATION - QUOTE QUOTES + QUALIFY QUOTE QUOTES RANGE READ REAL REASSIGN RECURSIVE REF_P REFERENCES REFERENCING REFRESH REINDEX RELATIONSHIP RELATIVE_P RELEASE RENAME REPACK REPEATABLE REPLACE REPLICA @@ -13741,7 +13741,7 @@ select_clause: simple_select: SELECT opt_all_clause opt_target_list into_clause from_clause where_clause - group_clause having_clause window_clause + group_clause having_clause window_clause qualify_clause { SelectStmt *n = makeNode(SelectStmt); @@ -13754,11 +13754,12 @@ simple_select: n->groupByAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; + n->qualifyClause = $10; $$ = (Node *) n; } | SELECT distinct_clause target_list into_clause from_clause where_clause - group_clause having_clause window_clause + group_clause having_clause window_clause qualify_clause { SelectStmt *n = makeNode(SelectStmt); @@ -13772,6 +13773,7 @@ simple_select: n->groupByAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; + n->qualifyClause = $10; $$ = (Node *) n; } | values_clause { $$ = $1; } @@ -14959,6 +14961,11 @@ where_clause: | /*EMPTY*/ { $$ = NULL; } ; +qualify_clause: + QUALIFY a_expr { $$ = $2; } + | /*EMPTY*/ { $$ = NULL; } + ; + /* variant for UPDATE and DELETE */ where_or_current_clause: WHERE a_expr { $$ = $2; } @@ -19346,6 +19353,7 @@ reserved_keyword: | ORDER | PLACING | PRIMARY + | QUALIFY | REFERENCES | RETURNING | SELECT diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index acb933392de..8153b074e55 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -407,6 +407,9 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) case EXPR_KIND_WHERE: errkind = true; break; + case EXPR_KIND_QUALIFY: + /* okay */ + break; case EXPR_KIND_POLICY: if (isAgg) err = _("aggregate functions are not allowed in policy expressions"); @@ -964,6 +967,7 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, err = _("window functions are not allowed in window definitions"); break; case EXPR_KIND_SELECT_TARGET: + case EXPR_KIND_QUALIFY: /* okay */ break; case EXPR_KIND_INSERT_TARGET: diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 9adc9d4c0f6..7633e32f915 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -579,6 +579,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) case EXPR_KIND_GENERATED_COLUMN: case EXPR_KIND_CYCLE_MARK: case EXPR_KIND_PROPGRAPH_PROPERTY: + case EXPR_KIND_QUALIFY: /* okay */ break; @@ -1820,6 +1821,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink) case EXPR_KIND_FROM_SUBSELECT: case EXPR_KIND_FROM_FUNCTION: case EXPR_KIND_WHERE: + case EXPR_KIND_QUALIFY: case EXPR_KIND_POLICY: case EXPR_KIND_HAVING: case EXPR_KIND_FILTER: @@ -3180,6 +3182,8 @@ ParseExprKindName(ParseExprKind exprKind) return "function in FROM"; case EXPR_KIND_WHERE: return "WHERE"; + case EXPR_KIND_QUALIFY: + return "QUALIFY"; case EXPR_KIND_POLICY: return "POLICY"; case EXPR_KIND_HAVING: diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index fb306c05112..0bb8371cdd9 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -2690,6 +2690,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location) case EXPR_KIND_WHERE: errkind = true; break; + case EXPR_KIND_QUALIFY: + errkind = true; + break; case EXPR_KIND_POLICY: err = _("set-returning functions are not allowed in policy expressions"); break; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4133c404a6b..9da8f9a27b2 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2300,6 +2300,7 @@ typedef struct SelectStmt bool groupByAll; /* Is this GROUP BY ALL? */ Node *havingClause; /* HAVING conditional-expression */ List *windowClause; /* WINDOW window_name AS (...), ... */ + Node *qualifyClause; /* QUALIFY conditional-expression */ /* * In a "leaf" node representing a VALUES list, the above fields are all diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 51ead54f015..a83545931c1 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -370,6 +370,7 @@ PG_KEYWORD("program", PROGRAM, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("properties", PROPERTIES, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("property", PROPERTY, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("publication", PUBLICATION, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("qualify", QUALIFY, RESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("quote", QUOTE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("quotes", QUOTES, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("range", RANGE, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f7f4ba6c2a8..2443a1192d4 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -45,6 +45,7 @@ typedef enum ParseExprKind EXPR_KIND_FROM_FUNCTION, /* function in FROM clause */ EXPR_KIND_WHERE, /* WHERE */ EXPR_KIND_HAVING, /* HAVING */ + EXPR_KIND_QUALIFY, /* QUALIFY */ EXPR_KIND_FILTER, /* FILTER */ EXPR_KIND_WINDOW_PARTITION, /* window definition PARTITION BY */ EXPR_KIND_WINDOW_ORDER, /* window definition ORDER BY */ diff --git a/src/test/regress/expected/qualify.out b/src/test/regress/expected/qualify.out new file mode 100644 index 00000000000..ae55a556449 --- /dev/null +++ b/src/test/regress/expected/qualify.out @@ -0,0 +1,1122 @@ +-- +-- QUALIFY clause tests +-- +CREATE TEMPORARY TABLE empsalary ( + depname varchar, + empno bigint, + salary int, + enroll_date date +); +INSERT INTO empsalary VALUES +('develop', 10, 5200, '2007-08-01'), +('sales', 1, 5000, '2006-10-01'), +('personnel', 5, 3500, '2007-12-10'), +('sales', 4, 4800, '2007-08-08'), +('personnel', 2, 3900, '2006-12-23'), +('develop', 7, 4200, '2008-01-01'), +('develop', 9, 4500, '2008-01-01'), +('sales', 3, 4800, '2007-08-01'), +('develop', 8, 6000, '2006-10-01'), +('develop', 11, 5200, '2007-08-15'); +-- +-- Basic QUALIFY functionality +-- +-- Simple QUALIFY with inline window function +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) +FROM empsalary +QUALIFY rank() OVER (PARTITION BY depname ORDER BY salary DESC) <= 2; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 2) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) +FROM empsalary +QUALIFY rank() OVER (PARTITION BY depname ORDER BY salary DESC) <= 2 +ORDER BY depname, salary DESC, empno; + depname | empno | salary | rank +-----------+-------+--------+------ + develop | 8 | 6000 | 1 + develop | 10 | 5200 | 2 + develop | 11 | 5200 | 2 + personnel | 2 | 3900 | 1 + personnel | 5 | 3500 | 2 + sales | 1 | 5000 | 1 + sales | 3 | 4800 | 2 + sales | 4 | 4800 | 2 +(8 rows) + +-- QUALIFY with named window +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER w +FROM empsalary +WINDOW w AS (PARTITION BY depname ORDER BY salary DESC) +QUALIFY rank() OVER w <= 2; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + WindowAgg + Window: w AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w <= 2) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT depname, empno, salary, + rank() OVER w +FROM empsalary +WINDOW w AS (PARTITION BY depname ORDER BY salary DESC) +QUALIFY rank() OVER w <= 2 +ORDER BY depname, salary DESC, empno; + depname | empno | salary | rank +-----------+-------+--------+------ + develop | 8 | 6000 | 1 + develop | 10 | 5200 | 2 + develop | 11 | 5200 | 2 + personnel | 2 | 3900 | 1 + personnel | 5 | 3500 | 2 + sales | 1 | 5000 | 1 + sales | 3 | 4800 | 2 + sales | 4 | 4800 | 2 +(8 rows) + +-- +-- Alias resolution tests +-- +-- Reference alias defined in SELECT +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 2) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 +ORDER BY depname, salary DESC, empno; + depname | empno | salary | rnk +-----------+-------+--------+----- + develop | 8 | 6000 | 1 + develop | 10 | 5200 | 2 + develop | 11 | 5200 | 2 + personnel | 2 | 3900 | 1 + personnel | 5 | 3500 | 2 + sales | 1 | 5000 | 1 + sales | 3 | 4800 | 2 + sales | 4 | 4800 | 2 +(8 rows) + +-- Reference alias with row_number +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 < 3) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +-- Multiple aliases +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rn = 1 OR rnk <= 2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: ((pg_qualify.rn = 1) OR (pg_qualify.rnk <= 2)) + -> WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(7 rows) + +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rn = 1 OR rnk <= 2 +ORDER BY depname, salary DESC; + depname | empno | salary | rn | rnk +-----------+-------+--------+----+----- + develop | 8 | 6000 | 1 | 1 + develop | 10 | 5200 | 2 | 2 + develop | 11 | 5200 | 3 | 2 + personnel | 2 | 3900 | 1 | 1 + personnel | 5 | 3500 | 2 | 2 + sales | 1 | 5000 | 1 | 1 + sales | 4 | 4800 | 2 | 2 + sales | 3 | 4800 | 3 | 2 +(8 rows) + +-- +-- Run condition optimization tests +-- +-- row_number with < operator (should use run condition) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 < 3) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +-- row_number with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn <= 2; + QUERY PLAN +--------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 2) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn <= 2; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +-- rank with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + empno | salary | r +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- dense_rank with = operator (should convert to <= for run condition) +EXPLAIN (COSTS OFF) +SELECT empno, salary, dense_rank() OVER (ORDER BY salary DESC) AS dr +FROM empsalary +QUALIFY dr = 1; + QUERY PLAN +---------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.dr = 1) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (dense_rank() OVER w1 <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(8 rows) + +SELECT empno, salary, dense_rank() OVER (ORDER BY salary DESC) AS dr +FROM empsalary +QUALIFY dr = 1; + empno | salary | dr +-------+--------+---- + 8 | 6000 | 1 +(1 row) + +-- count(*) with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, salary, count(*) OVER (ORDER BY salary DESC) AS c +FROM empsalary +QUALIFY c <= 3; + QUERY PLAN +--------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary) + Run Condition: (count(*) OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, salary, count(*) OVER (ORDER BY salary DESC) AS c +FROM empsalary +QUALIFY c <= 3; + empno | salary | c +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 3 + 11 | 5200 | 3 +(3 rows) + +-- +-- WHERE pushdown tests +-- Quals on PARTITION BY columns without window functions can be pushed to WHERE +-- +-- depname is in PARTITION BY, should be pushed to WHERE (SeqScan filter) +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 AND depname = 'develop'; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 2) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary + Filter: ((depname)::text = 'develop'::text) +(7 rows) + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 AND depname = 'develop' +ORDER BY salary DESC; + depname | empno | salary | rnk +---------+-------+--------+----- + develop | 8 | 6000 | 1 + develop | 10 | 5200 | 2 + develop | 11 | 5200 | 2 +(3 rows) + +-- Mixed: depname pushed to WHERE, rank condition stays as run condition +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY depname = 'sales' AND rnk = 1; + QUERY PLAN +---------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rnk = 1) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary + Filter: ((depname)::text = 'sales'::text) +(9 rows) + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY depname = 'sales' AND rnk = 1; + depname | empno | salary | rnk +---------+-------+--------+----- + sales | 1 | 5000 | 1 +(1 row) + +-- +-- QUALIFY without window function in SELECT list +-- The window function appears only in QUALIFY +-- +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.qualify4 = 1) + -> WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 1) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(8 rows) + +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 +ORDER BY depname; + depname | empno | salary +-----------+-------+-------- + develop | 8 | 6000 + personnel | 2 | 3900 + sales | 1 | 5000 +(3 rows) + +-- With PARTITION BY column filter pushed down +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 + AND depname = 'develop'; + QUERY PLAN +---------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.qualify4 = 1) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary + Filter: ((depname)::text = 'develop'::text) +(9 rows) + +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 + AND depname = 'develop'; + depname | empno | salary +---------+-------+-------- + develop | 8 | 6000 +(1 row) + +-- +-- Multiple window functions with different windows +-- +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary) AS rn_asc, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn_desc +FROM empsalary +QUALIFY rn_asc = 1 OR rn_desc = 1; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: ((pg_qualify.rn_asc = 1) OR (pg_qualify.rn_desc = 1)) + -> WindowAgg + Window: w2 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + -> Incremental Sort + Sort Key: empsalary.depname, empsalary.salary + Presorted Key: empsalary.depname + -> WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(12 rows) + +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary) AS rn_asc, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn_desc +FROM empsalary +QUALIFY rn_asc = 1 OR rn_desc = 1 +ORDER BY depname, empno; + depname | empno | salary | rn_asc | rn_desc +-----------+-------+--------+--------+--------- + develop | 7 | 4200 | 1 | 5 + develop | 8 | 6000 | 5 | 1 + personnel | 2 | 3900 | 2 | 1 + personnel | 5 | 3500 | 1 | 2 + sales | 1 | 5000 | 3 | 1 + sales | 4 | 4800 | 1 | 2 +(6 rows) + +-- +-- QUALIFY with aggregate and window functions +-- +EXPLAIN (COSTS OFF) +SELECT depname, sum(salary) AS total, + rank() OVER (ORDER BY sum(salary) DESC) AS rnk +FROM empsalary +GROUP BY depname +QUALIFY rnk <= 2; + QUERY PLAN +----------------------------------------------------------------------------------- + Subquery Scan on pg_qualify + -> WindowAgg + Window: w1 AS (ORDER BY (sum(empsalary.salary)) ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 2) + -> Sort + Sort Key: (sum(empsalary.salary)) DESC + -> HashAggregate + Group Key: empsalary.depname + -> Seq Scan on empsalary +(9 rows) + +SELECT depname, sum(salary) AS total, + rank() OVER (ORDER BY sum(salary) DESC) AS rnk +FROM empsalary +GROUP BY depname +QUALIFY rnk <= 2; + depname | total | rnk +---------+-------+----- + develop | 25100 | 1 + sales | 14600 | 2 +(2 rows) + +-- +-- QUALIFY in a grouped query without window functions. +-- The condition is validated like a SELECT-list item, so aggregates and +-- grouping columns are allowed (and behave as HAVING would). +-- +-- Aggregate used directly in QUALIFY filters groups like HAVING +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY count(*) > 2 +ORDER BY depname; + depname | c +---------+--- + develop | 5 + sales | 3 +(2 rows) + +-- A grouping column may be referenced directly in QUALIFY +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY depname <> 'sales' +ORDER BY depname; + depname | c +-----------+--- + develop | 5 + personnel | 2 +(2 rows) + +-- +-- Comparison with equivalent subquery (results should match) +-- +-- Subquery version +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r + FROM empsalary) emp +WHERE r <= 3; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT * FROM + (SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r + FROM empsalary) emp +WHERE r <= 3 +ORDER BY salary DESC, empno; + empno | salary | r +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- QUALIFY version (should produce same results) +EXPLAIN (COSTS OFF) +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3 +ORDER BY salary DESC, empno; + empno | salary | r +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- +-- QUALIFY with different comparison operators +-- +-- Greater than (no run condition optimization for increasing functions) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn > 7; + QUERY PLAN +--------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rn > 7) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(7 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn > 7 +ORDER BY empno; + empno | rn +-------+---- + 9 | 8 + 10 | 9 + 11 | 10 +(3 rows) + +-- Equality +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn = 5; + QUERY PLAN +--------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rn = 5) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 5) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(8 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn = 5; + empno | rn +-------+---- + 5 | 5 +(1 row) + +-- BETWEEN (should keep as filter) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn BETWEEN 3 AND 5; + QUERY PLAN +--------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rn >= 3) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 5) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(8 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn BETWEEN 3 AND 5 +ORDER BY empno; + empno | rn +-------+---- + 3 | 3 + 4 | 4 + 5 | 5 +(3 rows) + +-- IN list +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn IN (1, 3, 5); + QUERY PLAN +--------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rn = ANY ('{1,3,5}'::bigint[])) + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.empno ROWS UNBOUNDED PRECEDING) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(7 rows) + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn IN (1, 3, 5) +ORDER BY empno; + empno | rn +-------+---- + 1 | 1 + 3 | 3 + 5 | 5 +(3 rows) + +-- +-- QUALIFY with expressions +-- +EXPLAIN (COSTS OFF) +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 10 / 2; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 5) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 10 / 2; + empno | salary | rn +-------+--------+---- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 3 + 1 | 5000 | 4 + 4 | 4800 | 5 +(5 rows) + +-- +-- Error cases +-- +-- Aggregate in QUALIFY without GROUP BY: just as in the SELECT list, the +-- non-aggregated output columns make this an error (must appear in GROUP BY). +SELECT depname, empno, salary +FROM empsalary +QUALIFY sum(salary) > 5000; +ERROR: column "empsalary.depname" must appear in the GROUP BY clause or be used in an aggregate function +LINE 1: SELECT depname, empno, salary + ^ +-- QUALIFY in a grouped query referencing an ungrouped column (should error, +-- just as such a reference would in the SELECT list) +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY salary > 100; +ERROR: column "empsalary.salary" must appear in the GROUP BY clause or be used in an aggregate function +LINE 4: QUALIFY salary > 100; + ^ +-- QUALIFY with set-returning function (should error) +SELECT empno, salary +FROM empsalary +QUALIFY generate_series(1, 3) > 1; +ERROR: set-returning functions are not allowed in QUALIFY +LINE 3: QUALIFY generate_series(1, 3) > 1; + ^ +-- +-- QUALIFY in different query structures +-- +-- With DISTINCT +EXPLAIN (COSTS OFF) +SELECT DISTINCT depname, + first_value(empno) OVER (PARTITION BY depname ORDER BY salary DESC) AS top_emp +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Unique + -> Incremental Sort + Sort Key: pg_qualify.depname, pg_qualify.top_emp + Presorted Key: pg_qualify.depname + -> Subquery Scan on pg_qualify + Filter: (pg_qualify.qualify3 = 1) + -> WindowAgg + Window: w1 AS (PARTITION BY empsalary.depname ORDER BY empsalary.salary) + Run Condition: (row_number() OVER w1 <= 1) + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(12 rows) + +SELECT DISTINCT depname, + first_value(empno) OVER (PARTITION BY depname ORDER BY salary DESC) AS top_emp +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 +ORDER BY depname; + depname | top_emp +-----------+--------- + develop | 8 + personnel | 2 + sales | 1 +(3 rows) + +-- With ORDER BY +EXPLAIN (COSTS OFF) +SELECT empno, salary, + rank() OVER (ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 3; + QUERY PLAN +---------------------------------------------------------------------- + WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (rank() OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +SELECT empno, salary, + rank() OVER (ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 3 +ORDER BY salary DESC, empno; + empno | salary | rnk +-------+--------+----- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- With LIMIT (QUALIFY is evaluated before LIMIT) +EXPLAIN (COSTS OFF) +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 5 +LIMIT 3; + QUERY PLAN +---------------------------------------------------------------------------- + Limit + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 5) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(7 rows) + +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 5 +ORDER BY salary DESC +LIMIT 3; + empno | salary | rn +-------+--------+---- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 3 +(3 rows) + +-- +-- Verify evaluation order: QUALIFY happens after window functions, before DISTINCT +-- +-- This should first compute row_number, then filter, then apply DISTINCT +EXPLAIN (COSTS OFF) +SELECT DISTINCT salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 3; + QUERY PLAN +---------------------------------------------------------------------------- + HashAggregate + Group Key: empsalary.salary, row_number() OVER w1 + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.salary ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w1 <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(8 rows) + +SELECT DISTINCT salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 3; + salary | rn +--------+---- + 5200 | 2 + 5200 | 3 + 6000 | 1 +(3 rows) + +-- QUALIFY with FILTERS over window function +EXPLAIN(COSTS OFF) SELECT sum(salary), row_number() OVER (ORDER BY depname) as rank, sum( + sum(salary) FILTER (WHERE enroll_date > '2007-01-01') +) FILTER (WHERE depname <> 'sales') OVER (ORDER BY depname DESC) AS "filtered_sum", + depname +FROM empsalary GROUP BY depname QUALIFY rank = 1; + QUERY PLAN +----------------------------------------------------------------------------- + Subquery Scan on pg_qualify + Filter: (pg_qualify.rank = 1) + -> WindowAgg + Window: w2 AS (ORDER BY empsalary.depname ROWS UNBOUNDED PRECEDING) + Run Condition: (row_number() OVER w2 <= 1) + -> Sort + Sort Key: empsalary.depname + -> WindowAgg + Window: w1 AS (ORDER BY empsalary.depname) + -> Sort + Sort Key: empsalary.depname DESC + -> HashAggregate + Group Key: empsalary.depname + -> Seq Scan on empsalary +(14 rows) + +SELECT sum(salary), row_number() OVER (ORDER BY depname) as rank, sum( + sum(salary) FILTER (WHERE enroll_date > '2007-01-01') +) FILTER (WHERE depname <> 'sales') OVER (ORDER BY depname DESC) AS "filtered_sum", + depname +FROM empsalary GROUP BY depname QUALIFY rank = 1; + sum | rank | filtered_sum | depname +-------+------+--------------+--------- + 25100 | 1 | 22600 | develop +(1 row) + +-- +-- Aggregate functions directly in QUALIFY +-- (allowed in a grouped query, the same as in the SELECT list / HAVING) +-- +-- Aggregate condition over the group +EXPLAIN (COSTS OFF) +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY sum(salary) > 15000; + QUERY PLAN +------------------------------------------------- + Subquery Scan on pg_qualify + -> HashAggregate + Group Key: empsalary.depname + Filter: (sum(empsalary.salary) > 15000) + -> Seq Scan on empsalary +(5 rows) + +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY sum(salary) > 15000 +ORDER BY depname; + depname | total +---------+------- + develop | 25100 +(1 row) + +-- Aggregate and window function combined in QUALIFY +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY rank() OVER (ORDER BY sum(salary) DESC) = 1 +ORDER BY depname; + depname | total +---------+------- + develop | 25100 +(1 row) + +-- Whole-table aggregate (single group) +SELECT sum(salary) AS s +FROM empsalary +QUALIFY sum(salary) > 1000; + s +------- + 47100 +(1 row) + +-- +-- Name resolution tests +-- +-- An input column takes precedence over a same-named select-list alias +-- "salary" in QUALIFY is the input column (> 4000 keeps most rows), not the +-- alias (which is 0 and would keep none) +SELECT empno, (empno * 0) AS salary +FROM empsalary +QUALIFY salary > 4000 +ORDER BY empno; + empno | salary +-------+-------- + 1 | 0 + 3 | 0 + 4 | 0 + 7 | 0 + 8 | 0 + 9 | 0 + 10 | 0 + 11 | 0 +(8 rows) + +-- QUALIFY referecing columns that not on output result +-- Same column +SELECT depname FROM empsalary QUALIFY salary > 0 ORDER BY salary; + depname +----------- + personnel + personnel + develop + develop + sales + sales + sales + develop + develop + develop +(10 rows) + +-- Different column +SELECT depname FROM empsalary QUALIFY salary > 0 ORDER BY enroll_date; + depname +----------- + sales + develop + personnel + sales + develop + sales + develop + personnel + develop + develop +(10 rows) + +-- +-- Name resolution error cases +-- +-- A non-deterministic alias expression cannot be referenced from QUALIFY +SELECT empno, (random() < 2)::int AS r +FROM empsalary +QUALIFY r = 1; +ERROR: QUALIFY cannot reference output column "r" whose expression is non-deterministic +LINE 3: QUALIFY r = 1; + ^ +-- An ambiguous alias reference is rejected +SELECT empno AS x, salary AS x, + row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY x > 0; +ERROR: column reference "x" is ambiguous +LINE 4: QUALIFY x > 0; + ^ +-- A name that is neither an input column nor a select-list alias +SELECT empno +FROM empsalary +QUALIFY nosuchcol > row_number() OVER (ORDER BY empno); +ERROR: column "nosuchcol" does not exist +LINE 3: QUALIFY nosuchcol > row_number() OVER (ORDER BY empno); + ^ +-- +-- QUALIFY without any window function: a general post-SELECT filter +-- +EXPLAIN (COSTS OFF) +SELECT empno, salary, salary * 2 as new_salary +FROM empsalary +QUALIFY new_salary > 10000; + QUERY PLAN +---------------------------------- + Seq Scan on empsalary + Filter: ((salary * 2) > 10000) +(2 rows) + +SELECT empno, salary, salary * 2 as new_salary +FROM empsalary +QUALIFY new_salary > 10000 +ORDER BY empno; + empno | salary | new_salary +-------+--------+------------ + 8 | 6000 | 12000 + 10 | 5200 | 10400 + 11 | 5200 | 10400 +(3 rows) + +-- +-- QUALIFY in other query structures +-- +-- In the arms of a set operation +SELECT empno FROM empsalary +QUALIFY row_number() OVER (ORDER BY salary) = 1 +UNION ALL +SELECT empno FROM empsalary +QUALIFY row_number() OVER (ORDER BY salary DESC) = 1 +ORDER BY empno; + empno +------- + 5 + 8 +(2 rows) + +-- In a subquery that carries an outer reference +SELECT depname, + (SELECT empno FROM empsalary e2 + WHERE e2.depname = e1.depname + QUALIFY row_number() OVER (ORDER BY salary DESC) = 1) AS top_emp +FROM (SELECT DISTINCT depname FROM empsalary) e1 +ORDER BY depname; + depname | top_emp +-----------+--------- + develop | 8 + personnel | 2 + sales | 1 +(3 rows) + +-- Combined with row-level locking (the lock applies to the base table) +SELECT empno FROM empsalary +QUALIFY salary > 5000 +ORDER BY empno +FOR UPDATE; + empno +------- + 8 + 10 + 11 +(3 rows) + +-- +-- Cleanup +-- +DROP TABLE empsalary; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 8fa0a6c47fb..278554a7307 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -102,7 +102,7 @@ test: publication subscription # Another group of parallel tests # select_views depends on create_view # ---------- -test: select_views portals_p2 foreign_key dependency guc bitmapops combocid tsearch tsdicts foreign_data window xmlmap functional_deps advisory_lock indirect_toast equivclass stats_rewrite graph_table +test: select_views portals_p2 foreign_key dependency guc bitmapops combocid tsearch tsdicts foreign_data window qualify xmlmap functional_deps advisory_lock indirect_toast equivclass stats_rewrite graph_table # ---------- # Another group of parallel tests (JSON related) diff --git a/src/test/regress/sql/qualify.sql b/src/test/regress/sql/qualify.sql new file mode 100644 index 00000000000..c9bf99e23fe --- /dev/null +++ b/src/test/regress/sql/qualify.sql @@ -0,0 +1,563 @@ +-- +-- QUALIFY clause tests +-- + +CREATE TEMPORARY TABLE empsalary ( + depname varchar, + empno bigint, + salary int, + enroll_date date +); + +INSERT INTO empsalary VALUES +('develop', 10, 5200, '2007-08-01'), +('sales', 1, 5000, '2006-10-01'), +('personnel', 5, 3500, '2007-12-10'), +('sales', 4, 4800, '2007-08-08'), +('personnel', 2, 3900, '2006-12-23'), +('develop', 7, 4200, '2008-01-01'), +('develop', 9, 4500, '2008-01-01'), +('sales', 3, 4800, '2007-08-01'), +('develop', 8, 6000, '2006-10-01'), +('develop', 11, 5200, '2007-08-15'); + +-- +-- Basic QUALIFY functionality +-- + +-- Simple QUALIFY with inline window function +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) +FROM empsalary +QUALIFY rank() OVER (PARTITION BY depname ORDER BY salary DESC) <= 2; + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) +FROM empsalary +QUALIFY rank() OVER (PARTITION BY depname ORDER BY salary DESC) <= 2 +ORDER BY depname, salary DESC, empno; + +-- QUALIFY with named window +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER w +FROM empsalary +WINDOW w AS (PARTITION BY depname ORDER BY salary DESC) +QUALIFY rank() OVER w <= 2; + +SELECT depname, empno, salary, + rank() OVER w +FROM empsalary +WINDOW w AS (PARTITION BY depname ORDER BY salary DESC) +QUALIFY rank() OVER w <= 2 +ORDER BY depname, salary DESC, empno; + +-- +-- Alias resolution tests +-- + +-- Reference alias defined in SELECT +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2; + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 +ORDER BY depname, salary DESC, empno; + +-- Reference alias with row_number +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + +-- Multiple aliases +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rn = 1 OR rnk <= 2; + +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rn = 1 OR rnk <= 2 +ORDER BY depname, salary DESC; + +-- +-- Run condition optimization tests +-- + +-- row_number with < operator (should use run condition) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn < 3; + +-- row_number with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn <= 2; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn <= 2; + +-- rank with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + +-- dense_rank with = operator (should convert to <= for run condition) +EXPLAIN (COSTS OFF) +SELECT empno, salary, dense_rank() OVER (ORDER BY salary DESC) AS dr +FROM empsalary +QUALIFY dr = 1; + +SELECT empno, salary, dense_rank() OVER (ORDER BY salary DESC) AS dr +FROM empsalary +QUALIFY dr = 1; + +-- count(*) with <= operator +EXPLAIN (COSTS OFF) +SELECT empno, salary, count(*) OVER (ORDER BY salary DESC) AS c +FROM empsalary +QUALIFY c <= 3; + +SELECT empno, salary, count(*) OVER (ORDER BY salary DESC) AS c +FROM empsalary +QUALIFY c <= 3; + +-- +-- WHERE pushdown tests +-- Quals on PARTITION BY columns without window functions can be pushed to WHERE +-- + +-- depname is in PARTITION BY, should be pushed to WHERE (SeqScan filter) +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 AND depname = 'develop'; + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 2 AND depname = 'develop' +ORDER BY salary DESC; + +-- Mixed: depname pushed to WHERE, rank condition stays as run condition +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY depname = 'sales' AND rnk = 1; + +SELECT depname, empno, salary, + rank() OVER (PARTITION BY depname ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY depname = 'sales' AND rnk = 1; + +-- +-- QUALIFY without window function in SELECT list +-- The window function appears only in QUALIFY +-- + +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1; + +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 +ORDER BY depname; + +-- With PARTITION BY column filter pushed down +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 + AND depname = 'develop'; + +SELECT depname, empno, salary +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 + AND depname = 'develop'; + +-- +-- Multiple window functions with different windows +-- + +EXPLAIN (COSTS OFF) +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary) AS rn_asc, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn_desc +FROM empsalary +QUALIFY rn_asc = 1 OR rn_desc = 1; + +SELECT depname, empno, salary, + row_number() OVER (PARTITION BY depname ORDER BY salary) AS rn_asc, + row_number() OVER (PARTITION BY depname ORDER BY salary DESC) AS rn_desc +FROM empsalary +QUALIFY rn_asc = 1 OR rn_desc = 1 +ORDER BY depname, empno; + +-- +-- QUALIFY with aggregate and window functions +-- + +EXPLAIN (COSTS OFF) +SELECT depname, sum(salary) AS total, + rank() OVER (ORDER BY sum(salary) DESC) AS rnk +FROM empsalary +GROUP BY depname +QUALIFY rnk <= 2; + +SELECT depname, sum(salary) AS total, + rank() OVER (ORDER BY sum(salary) DESC) AS rnk +FROM empsalary +GROUP BY depname +QUALIFY rnk <= 2; + +-- +-- QUALIFY in a grouped query without window functions. +-- The condition is validated like a SELECT-list item, so aggregates and +-- grouping columns are allowed (and behave as HAVING would). +-- + +-- Aggregate used directly in QUALIFY filters groups like HAVING +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY count(*) > 2 +ORDER BY depname; + +-- A grouping column may be referenced directly in QUALIFY +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY depname <> 'sales' +ORDER BY depname; + +-- +-- Comparison with equivalent subquery (results should match) +-- + +-- Subquery version +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r + FROM empsalary) emp +WHERE r <= 3; + +SELECT * FROM + (SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r + FROM empsalary) emp +WHERE r <= 3 +ORDER BY salary DESC, empno; + +-- QUALIFY version (should produce same results) +EXPLAIN (COSTS OFF) +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3; + +SELECT empno, salary, rank() OVER (ORDER BY salary DESC) AS r +FROM empsalary +QUALIFY r <= 3 +ORDER BY salary DESC, empno; + +-- +-- QUALIFY with different comparison operators +-- + +-- Greater than (no run condition optimization for increasing functions) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn > 7; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn > 7 +ORDER BY empno; + +-- Equality +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn = 5; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn = 5; + +-- BETWEEN (should keep as filter) +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn BETWEEN 3 AND 5; + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn BETWEEN 3 AND 5 +ORDER BY empno; + +-- IN list +EXPLAIN (COSTS OFF) +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn IN (1, 3, 5); + +SELECT empno, row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY rn IN (1, 3, 5) +ORDER BY empno; + +-- +-- QUALIFY with expressions +-- + +EXPLAIN (COSTS OFF) +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 10 / 2; + +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 10 / 2; + +-- +-- Error cases +-- + +-- Aggregate in QUALIFY without GROUP BY: just as in the SELECT list, the +-- non-aggregated output columns make this an error (must appear in GROUP BY). +SELECT depname, empno, salary +FROM empsalary +QUALIFY sum(salary) > 5000; + +-- QUALIFY in a grouped query referencing an ungrouped column (should error, +-- just as such a reference would in the SELECT list) +SELECT depname, count(*) AS c +FROM empsalary +GROUP BY depname +QUALIFY salary > 100; + +-- QUALIFY with set-returning function (should error) +SELECT empno, salary +FROM empsalary +QUALIFY generate_series(1, 3) > 1; + +-- +-- QUALIFY in different query structures +-- + +-- With DISTINCT +EXPLAIN (COSTS OFF) +SELECT DISTINCT depname, + first_value(empno) OVER (PARTITION BY depname ORDER BY salary DESC) AS top_emp +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1; + +SELECT DISTINCT depname, + first_value(empno) OVER (PARTITION BY depname ORDER BY salary DESC) AS top_emp +FROM empsalary +QUALIFY row_number() OVER (PARTITION BY depname ORDER BY salary DESC) = 1 +ORDER BY depname; + +-- With ORDER BY +EXPLAIN (COSTS OFF) +SELECT empno, salary, + rank() OVER (ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 3; + +SELECT empno, salary, + rank() OVER (ORDER BY salary DESC) AS rnk +FROM empsalary +QUALIFY rnk <= 3 +ORDER BY salary DESC, empno; + +-- With LIMIT (QUALIFY is evaluated before LIMIT) +EXPLAIN (COSTS OFF) +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 5 +LIMIT 3; + +SELECT empno, salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 5 +ORDER BY salary DESC +LIMIT 3; + +-- +-- Verify evaluation order: QUALIFY happens after window functions, before DISTINCT +-- + +-- This should first compute row_number, then filter, then apply DISTINCT +EXPLAIN (COSTS OFF) +SELECT DISTINCT salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 3; + +SELECT DISTINCT salary, + row_number() OVER (ORDER BY salary DESC) AS rn +FROM empsalary +QUALIFY rn <= 3; + +-- QUALIFY with FILTERS over window function +EXPLAIN(COSTS OFF) SELECT sum(salary), row_number() OVER (ORDER BY depname) as rank, sum( + sum(salary) FILTER (WHERE enroll_date > '2007-01-01') +) FILTER (WHERE depname <> 'sales') OVER (ORDER BY depname DESC) AS "filtered_sum", + depname +FROM empsalary GROUP BY depname QUALIFY rank = 1; + +SELECT sum(salary), row_number() OVER (ORDER BY depname) as rank, sum( + sum(salary) FILTER (WHERE enroll_date > '2007-01-01') +) FILTER (WHERE depname <> 'sales') OVER (ORDER BY depname DESC) AS "filtered_sum", + depname +FROM empsalary GROUP BY depname QUALIFY rank = 1; + +-- +-- Aggregate functions directly in QUALIFY +-- (allowed in a grouped query, the same as in the SELECT list / HAVING) +-- + +-- Aggregate condition over the group +EXPLAIN (COSTS OFF) +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY sum(salary) > 15000; + +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY sum(salary) > 15000 +ORDER BY depname; + +-- Aggregate and window function combined in QUALIFY +SELECT depname, sum(salary) AS total +FROM empsalary +GROUP BY depname +QUALIFY rank() OVER (ORDER BY sum(salary) DESC) = 1 +ORDER BY depname; + +-- Whole-table aggregate (single group) +SELECT sum(salary) AS s +FROM empsalary +QUALIFY sum(salary) > 1000; + +-- +-- Name resolution tests +-- + +-- An input column takes precedence over a same-named select-list alias +-- "salary" in QUALIFY is the input column (> 4000 keeps most rows), not the +-- alias (which is 0 and would keep none) +SELECT empno, (empno * 0) AS salary +FROM empsalary +QUALIFY salary > 4000 +ORDER BY empno; + +-- QUALIFY referecing columns that not on output result + +-- Same column +SELECT depname FROM empsalary QUALIFY salary > 0 ORDER BY salary; + +-- Different column +SELECT depname FROM empsalary QUALIFY salary > 0 ORDER BY enroll_date; + +-- +-- Name resolution error cases +-- + +-- A non-deterministic alias expression cannot be referenced from QUALIFY +SELECT empno, (random() < 2)::int AS r +FROM empsalary +QUALIFY r = 1; + +-- An ambiguous alias reference is rejected +SELECT empno AS x, salary AS x, + row_number() OVER (ORDER BY empno) AS rn +FROM empsalary +QUALIFY x > 0; + +-- A name that is neither an input column nor a select-list alias +SELECT empno +FROM empsalary +QUALIFY nosuchcol > row_number() OVER (ORDER BY empno); + +-- +-- QUALIFY without any window function: a general post-SELECT filter +-- + +EXPLAIN (COSTS OFF) +SELECT empno, salary, salary * 2 as new_salary +FROM empsalary +QUALIFY new_salary > 10000; + +SELECT empno, salary, salary * 2 as new_salary +FROM empsalary +QUALIFY new_salary > 10000 +ORDER BY empno; + +-- +-- QUALIFY in other query structures +-- + +-- In the arms of a set operation +SELECT empno FROM empsalary +QUALIFY row_number() OVER (ORDER BY salary) = 1 +UNION ALL +SELECT empno FROM empsalary +QUALIFY row_number() OVER (ORDER BY salary DESC) = 1 +ORDER BY empno; + +-- In a subquery that carries an outer reference +SELECT depname, + (SELECT empno FROM empsalary e2 + WHERE e2.depname = e1.depname + QUALIFY row_number() OVER (ORDER BY salary DESC) = 1) AS top_emp +FROM (SELECT DISTINCT depname FROM empsalary) e1 +ORDER BY depname; + +-- Combined with row-level locking (the lock applies to the base table) +SELECT empno FROM empsalary +QUALIFY salary > 5000 +ORDER BY empno +FOR UPDATE; + +-- +-- Cleanup +-- +DROP TABLE empsalary; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c5db6ca6705..04eaaddc685 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2494,6 +2494,9 @@ QTNode QUERYTYPE QualCost QualItem +QualifyExtractContext +QualifyHookState +QualifyVarmapContext Query QueryCompletion QueryDesc -- 2.50.1 (Apple Git-155)