diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 29ae32d960..9db1c34188 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -138,6 +138,7 @@ bool enable_indexonlyscan = true; bool enable_bitmapscan = true; bool enable_tidscan = true; bool enable_sort = true; +bool enable_sort_pushdown = true; bool enable_incremental_sort = true; bool enable_hashagg = true; bool enable_nestloop = true; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 05f44faf6e..0d2381f620 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4513,8 +4513,11 @@ create_one_window_path(PlannerInfo *root, List *activeWindows) { PathTarget *window_target; + Query *parse = root->parse; ListCell *l; List *topqual = NIL; + List *orderby_pathkeys = NIL; + int sort_pushdown_idx = -1; /* * Since each window clause could require a different sort order, we stack @@ -4533,6 +4536,75 @@ create_one_window_path(PlannerInfo *root, */ window_target = input_target; + /* + * Here we attempt to minimize the number of sorts which must be performed + * for queries with an ORDER BY clause. + * + * It's possible due to select_active_windows() putting the WindowClauses + * with the lowest tleSortGroupRef last in the activeWindows list that the + * final WindowClause has a subset of the sort requirements that the + * query's ORDER BY clause has. Below we try to detect this case and if + * we find this is true, we consider adjusting the sort that's done for + * WindowAggs and include the additional clauses so that no additional + * sorting is required for the query's ORDER BY clause. + * + * We don't try this optimization for the following cases: + * + * 1. If the query has a DISTINCT clause. We needn't waste any additional + * effort for the more strict sort order as if DISTINCT is done via Hash + * Aggregate then that's going to undo this work. + * + * 2. If the query has a LIMIT clause. The top-level sort will be able to + * use a top-n sort which might be more efficient than sorting by the + * additional columns. If the LIMIT does not reduce the number of rows + * significantly then this might not be true, but we don't try to consider + * that here. + * + * 3. If the top-level WindowClause has a runCondition then this can + * filter out tuples and make the final sort cheaper. If we pushed the + * sort down below the WindowAgg then we'd need to sort all rows including + * ones that the runCondition might filter out. This may waste effort so + * we just don't try to push down the sort for this case. + * + * 4. If the ORDER BY contains any expressions containing WindowFuncs then + * we can't push down the sort as these obviously must be evaluated before + * they can be sorted. + */ + if (enable_sort_pushdown && parse->sortClause != NIL && parse->distinctClause == NIL && + parse->limitCount == NULL && + llast_node(WindowClause, activeWindows)->runCondition == NIL && + !contain_window_function((Node *) get_sortgrouplist_exprs(parse->sortClause, + root->processed_tlist))) + { + orderby_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + root->processed_tlist); + + /* + * Loop backwards over the WindowClauses looking for the first + * WindowClause which has pathkeys not contained in the + * orderby_pathkeys. What we're looking for here is the lowest level + * that we push the additional sort requirements down into. If we + * can't find any WindowClause with suitable pathkeys here then + * sort_pushdown_idx will remain at -1 which will effectively disable + * the pushdown attempt later in this function. + */ + for (int i = list_length(activeWindows) - 1; i >= 0; i--) + { + WindowClause *wc = list_nth_node(WindowClause, activeWindows, i); + List *window_pathkeys; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + root->processed_tlist); + + if (pathkeys_contained_in(window_pathkeys, orderby_pathkeys)) + sort_pushdown_idx = i; + else + break; + } + } + foreach(l, activeWindows) { WindowClause *wc = lfirst_node(WindowClause, l); @@ -4545,6 +4617,25 @@ create_one_window_path(PlannerInfo *root, wc, root->processed_tlist); + /* + * When we get to the sort_pushdown_idx WindowClause, if the path is + * not already correctly sorted, then just use the pathkeys for the + * query's ORDER BY clause instead of the WindowClause's pathkeys. + * When the path is already correctly sorted there's no point in + * adjusting the pathkeys as this just moves the sort down without + * actually reducing the number of sorts which are required in the + * plan overall. sort_pushdown_idx will be -1 and never match when + * this optimization was disabled above. + */ + if (foreach_current_index(l) == sort_pushdown_idx && + !pathkeys_contained_in(window_pathkeys, path->pathkeys)) + { + /* check to make sure sort_pushdown_idx was set correctly */ + Assert(pathkeys_contained_in(window_pathkeys, orderby_pathkeys)); + + window_pathkeys = orderby_pathkeys; + } + is_sorted = pathkeys_count_contained_in(window_pathkeys, path->pathkeys, &presorted_keys); diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4ac808ed22..2bd952c612 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -845,6 +845,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_sort_pushdown", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's ability to push ORDER BY below WindowClauses."), + NULL, + GUC_EXPLAIN + }, + &enable_sort_pushdown, + true, + NULL, NULL, NULL + }, { {"enable_incremental_sort", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of incremental sort steps."), diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 6cf49705d3..2448e59715 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -55,6 +55,7 @@ extern PGDLLIMPORT bool enable_indexonlyscan; extern PGDLLIMPORT bool enable_bitmapscan; extern PGDLLIMPORT bool enable_tidscan; extern PGDLLIMPORT bool enable_sort; +extern PGDLLIMPORT bool enable_sort_pushdown; extern PGDLLIMPORT bool enable_incremental_sort; extern PGDLLIMPORT bool enable_hashagg; extern PGDLLIMPORT bool enable_nestloop; diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 26e2df6da5..f50edc7583 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3984,6 +3984,226 @@ ORDER BY depname, empno; -> Seq Scan on empsalary (12 rows) +CREATE INDEX empsalary_depname_idx ON empsalary(depname); +SET enable_seqscan TO off; +-- Ensure we don't push the ORDER BY sort below the WindowAgg when the input +-- node to the WindowAgg is already sorted correctly. This would just move +-- the sort down and not reduce the number of sorts. +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + QUERY PLAN +----------------------------------------------------------------- + Incremental Sort + Sort Key: depname, empno + Presorted Key: depname + -> WindowAgg + -> Index Scan using empsalary_depname_idx on empsalary +(5 rows) + +DROP INDEX empsalary_depname_idx; +RESET enable_seqscan; +-- Ensure we push the ORDER BY sort below the WindowAgg +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname, empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +----------------------------------------------- + WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> Seq Scan on empsalary +(4 rows) + +-- A more complex permutation of the above. Ensure the ORDER BY sort is +-- pushed down below the WindowAgg when there is a PARTITION BY and ORDER BY +-- clause in the WindowFunc +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +----------------------------------------------- + WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> Seq Scan on empsalary +(4 rows) + +-- Try more complex permutations of pushing the ORDER BY's sort below the +-- WindowAgg. With multiple WindowAggs sharing the same sort, ensure we push +-- the sort to the correct level. +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +------------------------------------------------------ + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(8 rows) + +-- As above, but swap the order of the WindowFuncs to ensure their order is +-- not a factor in the plan choice. +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + count(*) OVER (ORDER BY enroll_date DESC) c, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + QUERY PLAN +------------------------------------------------------ + WindowAgg + -> WindowAgg + -> Sort + Sort Key: depname, empno, enroll_date + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(8 rows) + +-- Ensure that we don't push the ORDER BY's sort down for the following cases. +-- See the comment in create_one_window_path() for details of each case that +-- we don't perform the optimization. +-- Case #1: Don't push the ORDER BY's sort down when there's a DISTINCT clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT empno, + depname, + min(salary) OVER (PARTITION BY depname) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno; + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Unique + -> Incremental Sort + Sort Key: depname, empno, (min(salary) OVER (?)), (sum(salary) OVER (?)), (count(*) OVER (?)) + Presorted Key: depname + -> WindowAgg + -> Sort + Sort Key: depname + -> WindowAgg + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary +(11 rows) + +-- Case #2: Don't push the ORDER BY's sort down when there's a LIMIT clause +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY empno) depminsalary, + sum(salary) OVER (PARTITION BY empno) depsalary +FROM empsalary +ORDER BY empno, depname +LIMIT 1; + QUERY PLAN +----------------------------------------------- + Limit + -> Incremental Sort + Sort Key: empno, depname + Presorted Key: empno + -> WindowAgg + -> Sort + Sort Key: empno + -> Seq Scan on empsalary +(8 rows) + +-- Case #3: Don't push the ORDER BY's sort down when the top-level WindowAgg +-- has a runCondition. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT depname, + empno, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) emprn, + sum(salary) OVER (PARTITION BY depname ORDER BY empno) depsalary + FROM empsalary + ORDER BY depname, enroll_date, empno +) e +WHERE emprn < 3; + QUERY PLAN +------------------------------------------------------------------------------ + Subquery Scan on e + -> Incremental Sort + Sort Key: empsalary.depname, empsalary.enroll_date, empsalary.empno + Presorted Key: empsalary.depname, empsalary.enroll_date + -> WindowAgg + Run Condition: (row_number() OVER (?) < 3) + -> Incremental Sort + Sort Key: empsalary.depname, empsalary.enroll_date + Presorted Key: empsalary.depname + -> WindowAgg + -> Sort + Sort Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary +(13 rows) + +-- Try a positive version of case #3 where the runCondition exists but it's +-- not on the final WindowAgg. Ensure the sort is pushed below the WindowAgg +-- in this case. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT depname, + empno, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) emprn, + row_number() OVER (PARTITION BY depname ORDER BY empno) emprn2 + FROM empsalary + ORDER BY depname, enroll_date, empno +) e +WHERE emprn2 < 3; + QUERY PLAN +----------------------------------------------------------------------------------- + Subquery Scan on e + -> WindowAgg + Filter: ((row_number() OVER (?)) < 3) + -> Incremental Sort + Sort Key: empsalary.depname, empsalary.enroll_date, empsalary.empno + Presorted Key: empsalary.depname + -> WindowAgg + Run Condition: (row_number() OVER (?) < 3) + -> Sort + Sort Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary +(11 rows) + +-- Case #4: Don't push the ORDER BY's sort down when the ORDER BY contains +-- WindowFunc results +EXPLAIN (COSTS OFF) +SELECT empno, + row_number() over (PARTITION BY empno) esalary +FROM empsalary +ORDER BY empno, esalary; + QUERY PLAN +-------------------------------------------- + Incremental Sort + Sort Key: empno, (row_number() OVER (?)) + Presorted Key: empno + -> WindowAgg + -> Sort + Sort Key: empno + -> Seq Scan on empsalary +(7 rows) + RESET enable_hashagg; -- Test Sort node reordering EXPLAIN (COSTS OFF) diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index b7bd0a83da..6c7de48a2b 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -1324,6 +1324,120 @@ SELECT DISTINCT FROM empsalary ORDER BY depname, empno; +CREATE INDEX empsalary_depname_idx ON empsalary(depname); +SET enable_seqscan TO off; + +-- Ensure we don't push the ORDER BY sort below the WindowAgg when the input +-- node to the WindowAgg is already sorted correctly. This would just move +-- the sort down and not reduce the number of sorts. +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname) depminsalary +FROM empsalary +ORDER BY depname, empno; + +DROP INDEX empsalary_depname_idx; +RESET enable_seqscan; + +-- Ensure we push the ORDER BY sort below the WindowAgg +EXPLAIN (COSTS OFF) +SELECT empno, + min(salary) OVER (PARTITION BY depname, empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- A more complex permutation of the above. Ensure the ORDER BY sort is +-- pushed down below the WindowAgg when there is a PARTITION BY and ORDER BY +-- clause in the WindowFunc +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- Try more complex permutations of pushing the ORDER BY's sort below the +-- WindowAgg. With multiple WindowAggs sharing the same sort, ensure we push +-- the sort to the correct level. +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- As above, but swap the order of the WindowFuncs to ensure their order is +-- not a factor in the plan choice. +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + count(*) OVER (ORDER BY enroll_date DESC) c, + sum(salary) OVER (PARTITION BY depname) depsalary, + min(salary) OVER (PARTITION BY depname ORDER BY empno) depminsalary +FROM empsalary +ORDER BY depname, empno, enroll_date; + +-- Ensure that we don't push the ORDER BY's sort down for the following cases. +-- See the comment in create_one_window_path() for details of each case that +-- we don't perform the optimization. + +-- Case #1: Don't push the ORDER BY's sort down when there's a DISTINCT clause +EXPLAIN (COSTS OFF) +SELECT DISTINCT empno, + depname, + min(salary) OVER (PARTITION BY depname) depminsalary, + sum(salary) OVER (PARTITION BY depname) depsalary, + count(*) OVER (ORDER BY enroll_date DESC) c +FROM empsalary +ORDER BY depname, empno; + +-- Case #2: Don't push the ORDER BY's sort down when there's a LIMIT clause +EXPLAIN (COSTS OFF) +SELECT empno, + depname, + min(salary) OVER (PARTITION BY empno) depminsalary, + sum(salary) OVER (PARTITION BY empno) depsalary +FROM empsalary +ORDER BY empno, depname +LIMIT 1; + +-- Case #3: Don't push the ORDER BY's sort down when the top-level WindowAgg +-- has a runCondition. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT depname, + empno, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) emprn, + sum(salary) OVER (PARTITION BY depname ORDER BY empno) depsalary + FROM empsalary + ORDER BY depname, enroll_date, empno +) e +WHERE emprn < 3; + +-- Try a positive version of case #3 where the runCondition exists but it's +-- not on the final WindowAgg. Ensure the sort is pushed below the WindowAgg +-- in this case. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT depname, + empno, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) emprn, + row_number() OVER (PARTITION BY depname ORDER BY empno) emprn2 + FROM empsalary + ORDER BY depname, enroll_date, empno +) e +WHERE emprn2 < 3; + +-- Case #4: Don't push the ORDER BY's sort down when the ORDER BY contains +-- WindowFunc results +EXPLAIN (COSTS OFF) +SELECT empno, + row_number() over (PARTITION BY empno) esalary +FROM empsalary +ORDER BY empno, esalary; + RESET enable_hashagg; -- Test Sort node reordering