diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 000d757bdd..cf59ebfe87 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4421,8 +4421,14 @@ create_one_window_path(PlannerInfo *root, List *activeWindows) { PathTarget *window_target; + Query *parse = root->parse; ListCell *l; List *topqual = NIL; + List *window_pathkeys; + List *orderby_pathkeys = NIL; + int sort_pushdown_idx = -1; + int presorted_keys; + bool is_sorted; /* * Since each window clause could require a different sort order, we stack @@ -4441,17 +4447,99 @@ create_one_window_path(PlannerInfo *root, */ window_target = input_target; + /* + * Here we attempt to minimize the number of sorts which must be performed + * for queries with an ORDER BY clause. + * + * It's possible due to select_active_windows() putting the WindowClauses + * with the lowest tleSortGroupRef last in the activeWindows list that the + * final WindowClause has a subset of the sort requirements that the + * query's ORDER BY clause has. Below we try to detect this case and if + * we find this is true, we consider adjusting the sort that's done for + * WindowAggs and include the additional clauses so that no additional + * sorting is required for the query's ORDER BY clause. + * + * We don't try this optimization for the following cases: + * + * 1. If the query has a DISTINCT clause. We needn't waste any + * additional effort for the more strict sort order as if DISTINCT + * is done via Hash Aggregate then that's going to undo this work. + * + * 2. If the query has a LIMIT clause. The top-level sort will be + * able to use a top-n sort which might be more efficient than + * sorting by the additional columns. If the LIMIT does not reduce + * the number of rows significantly then this might not be true, but + * we don't try to consider that here. + * + * 3. If the top-level WindowClause has a runCondition then this can + * filter out tuples and make the final sort cheaper. If we pushed + * the sort down below the WindowAgg then we'd need to sort all rows + * including ones that the runCondition might filter. This may waste + * effort so we just don't try to push down the sort for this case. + * + * 4. If the ORDER BY contains any expressions containing WindowFuncs + * then we can't push down the sort as these obviously must be + * evaluated before they can be sorted. + */ + if (parse->sortClause != NIL && parse->distinctClause == NIL && + parse->limitCount == NULL && + linitial_node(WindowClause, activeWindows)->runCondition == NIL && + !contain_window_function((Node *) get_sortgrouplist_exprs(parse->sortClause, + root->processed_tlist))) + { + orderby_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + root->processed_tlist); + + /* + * Loop backwards over the WindowClauses looking for the first + * WindowClause which has pathkeys not contained in the + * orderby_pathkeys. What we're looking for here is the lowest level + * that we push the additional sort requirements down into. If we + * fail here then sort_pushdown_idx will remain at -1. + */ + foreach_reverse(l, activeWindows) + { + WindowClause *wc = lfirst_node(WindowClause, l); + + window_pathkeys = make_pathkeys_for_window(root, + wc, + root->processed_tlist); + + if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys)) + sort_pushdown_idx = foreach_current_index(l); + else + break; + } + } + foreach(l, activeWindows) { WindowClause *wc = lfirst_node(WindowClause, l); - List *window_pathkeys; - int presorted_keys; - bool is_sorted; bool topwindow; window_pathkeys = make_pathkeys_for_window(root, - wc, - root->processed_tlist); + wc, + root->processed_tlist); + + /* + * When we get to the sort_pushdown_idx WindowClause, if the path is + * not already correctly sorted, then just use the pathkeys for the + * query's ORDER BY clause instead of the WindowClause's pathkeys. + * When the path is already correctly sorted there's no point in + * adjusting the pathkeys as this just moves the sort down without + * actually reducing the number of sorts which are required in the + * plan overall. sort_pushdown_idx will be -1 and never match when + * this optimization was disabled above. + */ + if (foreach_current_index(l) == sort_pushdown_idx && + !pathkeys_contained_in(window_pathkeys, path->pathkeys)) + { + /* check to make sure sort_pushdown_idx was set correctly */ + Assert(pathkeys_contained_in(window_pathkeys, orderby_pathkeys)); + + window_pathkeys = orderby_pathkeys; + } is_sorted = pathkeys_count_contained_in(window_pathkeys, path->pathkeys, diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 529a382d28..0de9d73dac 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -378,14 +378,30 @@ lnext(const List *l, const ListCell *c) (cell = NULL, false); \ cell##__state.i++) +/* + * foreach_reverse - + * a convenience macro for looping through a list in reverse + * + * This is equivalent to foreach, only it loops over the list starting with + * the last element and ends on the first element. + */ +#define foreach_reverse(cell, lst) \ + for (ForEachState cell##__state = {(lst), cell##__state.l->length - 1}; \ + (cell##__state.l != NIL && \ + cell##__state.i >= 0) ? \ + (cell = &cell##__state.l->elements[cell##__state.i], true) : \ + (cell = NULL, false); \ + cell##__state.i--) + /* * foreach_delete_current - * delete the current list element from the List associated with a - * surrounding foreach() loop, returning the new List pointer. + * surrounding foreach() or foreach_reverse() loop, returning the new List +* pointer. * * This is equivalent to list_delete_cell(), but it also adjusts the foreach - * loop's state so that no list elements will be missed. Do not delete - * elements from an active foreach loop's list in any other way! + * or foreach_reverse loop's state so that no list elements will be missed. + * Do not delete elements from an active foreach loop's list in any other way! */ #define foreach_delete_current(lst, cell) \ (cell##__state.i--, \ @@ -393,8 +409,9 @@ lnext(const List *l, const ListCell *c) /* * foreach_current_index - - * get the zero-based list index of a surrounding foreach() loop's - * current element; pass the name of the "ListCell *" iterator variable. + * get the zero-based index within the list of the current element in the + * surrounding foreach() or foreach_reverse() loop; pass the name of the + * "ListCell *" iterator variable. * * Beware of using this after foreach_delete_current(); the value will be * out of sync for the rest of the current loop iteration. Anyway, since diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 90e89fb5b6..d83fea8091 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3982,7 +3982,194 @@ ORDER BY depname, empno; -> Seq Scan on empsalary (11 rows) +-- Do not perform sort pushdown if column is presorted +CREATE INDEX depname_idx ON empsalary(depname); +SET enable_seqscan=0; +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + QUERY PLAN +------------------------------------------------------- + Incremental Sort + Sort Key: depname, empno + Presorted Key: depname + -> WindowAgg + -> Index Scan using depname_idx on empsalary +(5 rows) + +-- Do perform sort pushdown if columns are only partially sorted +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname, empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +------------------------------------------------------- + WindowAgg + -> Incremental Sort + Sort Key: depname, empno, enroll_date + Presorted Key: depname + -> Index Scan using depname_idx on empsalary +(5 rows) + +DROP INDEX depname_idx; +RESET enable_seqscan; RESET enable_hashagg; +-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as +-- QUERY's ORDER BY to save additional sorting in the end +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (ORDER BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + QUERY PLAN +----------------------------------- + WindowAgg + -> Sort + Sort Key: depname, empno + -> Seq Scan on empsalary +(4 rows) + +-- Same as above, but PARTITION BY clause is included as well +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +----------------------------------------------- + WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> Seq Scan on empsalary +(4 rows) + +-- Negation of above, when sort optimization is not needed to be performed +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY empno, enroll_date; + QUERY PLAN +----------------------------------------- + Sort + Sort Key: empno, enroll_date + -> WindowAgg + -> Sort + Sort Key: depname, empno + -> Seq Scan on empsalary +(6 rows) + +-- ORDER BY's in multiple Window functions can be combined into one +-- if they are subset of QUERY's ORDER BY +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +------------------------------------------------------ + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(8 rows) + +-- order of window function shouldn't affect query's ORDER BY pushdown +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + count(*) OVER (ORDER BY enroll_date DESC) c, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +------------------------------------------------------ + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(8 rows) + +-- SORT OPTIMIZATION (as in cases above) is not needed to be performed +-- in cases, as mentioned below. +-- Case #1: When distinct clause is included +EXPLAIN (COSTS OFF) +SELECT DISTINCT empno, + depname, + min(salary) OVER (PARTITION BY depname) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Unique + -> Sort + Sort Key: depname, empno, (min(salary) OVER (?)), (sum(salary) OVER (?)), (count(*) OVER (?)) + -> WindowAgg + -> Sort + Sort Key: depname + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(10 rows) + +-- Case #2: When Limit clause is specified +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY empno) depminsalary, + sum(salary) OVER (PARTITION BY empno) depsalary +FROM empsalary +ORDER BY empno, depname LIMIT 1; + QUERY PLAN +----------------------------------------------- + Limit + -> Incremental Sort + Sort Key: empno, depname + Presorted Key: empno + -> WindowAgg + -> Sort + Sort Key: empno + -> Seq Scan on empsalary +(8 rows) + +-- Case #3: When ORDER BY contains any WindowFuncs +EXPLAIN (COSTS OFF) +SELECT empno, + row_number() over (PARTITION BY empno) esalary +FROM empsalary +ORDER BY empno, esalary; + QUERY PLAN +-------------------------------------------- + Incremental Sort + Sort Key: empno, (row_number() OVER (?)) + Presorted Key: empno + -> WindowAgg + -> Sort + Sort Key: empno + -> Seq Scan on empsalary +(7 rows) + -- Test Sort node reordering EXPLAIN (COSTS OFF) SELECT diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index b7bd0a83da..e989e93737 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -1324,8 +1324,102 @@ SELECT DISTINCT FROM empsalary ORDER BY depname, empno; +-- Do not perform sort pushdown if column is presorted +CREATE INDEX depname_idx ON empsalary(depname); +SET enable_seqscan=0; + +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + +-- Do perform sort pushdown if columns are only partially sorted +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname, empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +DROP INDEX depname_idx; +RESET enable_seqscan; RESET enable_hashagg; +-- If window function's ORDER BY is subset of QUERY's ORDER BY, sort as per as +-- QUERY's ORDER BY to save additional sorting in the end +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (ORDER BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + +-- Same as above, but PARTITION BY clause is included as well +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- Negation of above, when sort optimization is not needed to be performed +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY empno, enroll_date; + +-- ORDER BY's in multiple Window functions can be combined into one +-- if they are subset of QUERY's ORDER BY +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- order of window function shouldn't affect query's ORDER BY pushdown +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + count(*) OVER (ORDER BY enroll_date DESC) c, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- SORT OPTIMIZATION (as in cases above) is not needed to be performed +-- in cases, as mentioned below. + +-- Case #1: When distinct clause is included +EXPLAIN (COSTS OFF) +SELECT DISTINCT empno, + depname, + min(salary) OVER (PARTITION BY depname) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno; + +-- Case #2: When Limit clause is specified +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY empno) depminsalary, + sum(salary) OVER (PARTITION BY empno) depsalary +FROM empsalary +ORDER BY empno, depname LIMIT 1; + +-- Case #3: When ORDER BY contains any WindowFuncs +EXPLAIN (COSTS OFF) +SELECT empno, + row_number() over (PARTITION BY empno) esalary +FROM empsalary +ORDER BY empno, esalary; + -- Test Sort node reordering EXPLAIN (COSTS OFF) SELECT