From d5fd0f8f981d9e457320c1007f21f2b9b74aab9e Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Thu, 10 Feb 2022 13:51:51 +0500 Subject: [PATCH 2/2] Use default restriction for number of groups. --- src/backend/optimizer/path/costsize.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 68a32740d7..b9e975df10 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1756,8 +1756,8 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) /* * is_fake_var - * Workaround for generate_append_tlist() which generates fake Vars with - * varno == 0, that will cause a fail of estimate_num_group() call + * Workaround for generate_append_tlist() which generates fake Vars for the + * case of "varno 0", that will cause a fail of estimate_num_group() call * * XXX Ummm, why would estimate_num_group fail with this? */ @@ -1978,21 +1978,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, tuplesPerPrevGroup, NULL, NULL, &cache_varinfos, list_length(pathkeyExprs) - 1); - else if (tuples > 4.0) + else /* - * Use geometric mean as estimation if there is no any stats. - * Don't use DEFAULT_NUM_DISTINCT because it used for only one - * column while here we try to estimate number of groups over - * set of columns. - * - * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to - * limit the calculated values, somehow? - * - * XXX What's the logic of the following formula? + * In case of full uncertainity use default defensive approach. It + * means that any permutations of such vars are equivalent. + * Also, see comments for the cost_incremental_sort routine. */ - nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); - else - nGroups = tuples; + nGroups = Min(tuplesPerPrevGroup, DEFAULT_NUM_DISTINCT); /* * Presorted keys aren't participated in comparison but still checked -- 2.25.1