diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index b5d6c977ee..5f0ead2db8 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -1890,11 +1890,13 @@ find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, * Since we assume here that a sort is required, there is no particular use * in matching any available ordering of the outerrel. (joinpath.c has an * entirely separate code path for considering sort-free mergejoins.) Rather, - * it's interesting to try to match the requested query_pathkeys so that a - * second output sort may be avoided; and failing that, we try to list "more - * popular" keys (those with the most unmatched EquivalenceClass peers) - * earlier, in hopes of making the resulting ordering useful for as many - * higher-level mergejoins as possible. + * it's interesting to try to match, or match a prefix of the requested + * query_pathkeys so that a second output sort may be avoided or an + * incremental sort may be done instead. We can get away with just a prefix + * of the query_pathkeys when that prefix covers the entire join condition. + * Failing that, we try to list "more popular" keys (those with the most + * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting + * ordering useful for as many higher-level mergejoins as possible. */ List * select_outer_pathkeys_for_merge(PlannerInfo *root, @@ -1964,11 +1966,16 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, /* * Find out if we have all the ECs mentioned in query_pathkeys; if so we - * can generate a sort order that's also useful for final output. There is - * no percentage in a partial match, though, so we have to have 'em all. + * can generate a sort order that's also useful for final output. If we + * only have a prefix of the query_pathkeys, and that prefix is the entire + * join condition, then it's useful to use the prefix as the pathkeys as + * this increases the chances that an incremental sort will be able to be + * used by the upper planner. */ if (root->query_pathkeys) { + int matches = 0; + foreach(lc, root->query_pathkeys) { PathKey *query_pathkey = (PathKey *) lfirst(lc); @@ -1981,6 +1988,8 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, } if (j >= necs) break; /* didn't find match */ + + matches++; } /* if we got to the end of the list, we have them all */ if (lc == NULL) @@ -2003,6 +2012,23 @@ select_outer_pathkeys_for_merge(PlannerInfo *root, } } } + + /* + * If we didn't find a full match, but matched all of the join clauses + * then we'll make use of these as partially sorted input is better + * than nothing for the upper planner as it may lead to incremental + * sorts instead of full sorts. + */ + else if (matches == nClauses) + { + pathkeys = list_copy_head(root->query_pathkeys, matches); + + /* we have all of the join pathkeys, so nothing more to do */ + pfree(ecs); + pfree(scores); + + return pathkeys; + } } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index e1d9d971d6..e83c552159 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2437,6 +2437,37 @@ select count(*) from 10000 (1 row) +set enable_hashjoin = 0; +set enable_nestloop = 0; +set enable_hashagg = 0; +-- +-- check that we use the pathkeys from a prefix of the group by / order by +-- clause for the join pathkeys when that prefix covers all join quals. We +-- expect this to lead to an incremental sort for the group by / order by. +-- +explain (costs off) +select x.thousand, x.twothousand, count(*) +from tenk1 x inner join tenk1 y on x.thousand = y.thousand +group by x.thousand, x.twothousand +order by x.thousand desc, x.twothousand; + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate + Group Key: x.thousand, x.twothousand + -> Incremental Sort + Sort Key: x.thousand DESC, x.twothousand + Presorted Key: x.thousand + -> Merge Join + Merge Cond: (y.thousand = x.thousand) + -> Index Only Scan Backward using tenk1_thous_tenthous on tenk1 y + -> Sort + Sort Key: x.thousand DESC + -> Seq Scan on tenk1 x +(11 rows) + +reset enable_hashagg; +reset enable_nestloop; +reset enable_hashjoin; -- -- Clean up -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b5f41c4955..30c47fa957 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -472,6 +472,24 @@ select count(*) from (select * from tenk1 y order by y.unique2) y on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2; +set enable_hashjoin = 0; +set enable_nestloop = 0; +set enable_hashagg = 0; + +-- +-- check that we use the pathkeys from a prefix of the group by / order by +-- clause for the join pathkeys when that prefix covers all join quals. We +-- expect this to lead to an incremental sort for the group by / order by. +-- +explain (costs off) +select x.thousand, x.twothousand, count(*) +from tenk1 x inner join tenk1 y on x.thousand = y.thousand +group by x.thousand, x.twothousand +order by x.thousand desc, x.twothousand; + +reset enable_hashagg; +reset enable_nestloop; +reset enable_hashjoin; -- -- Clean up