From fbc5e6709550f485a2153dda97ef805700717f23 Mon Sep 17 00:00:00 2001 From: Andrey Lepikhov Date: Fri, 24 Dec 2021 13:01:48 +0500 Subject: [PATCH] My fixes. --- src/backend/optimizer/path/costsize.c | 33 ++++++++++++--------------- src/backend/optimizer/path/pathkeys.c | 9 ++++---- src/backend/optimizer/plan/planner.c | 8 ------- 3 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index afb8ba54ea..70af9c91d5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1845,19 +1845,17 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) * groups in the current pathkey prefix and the new pathkey), and the comparison * costs (which is data type specific). * - * Estimation of the number of comparisons is based on ideas from two sources: + * Estimation of the number of comparisons is based on ideas from: * - * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/] + * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002 + * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf] * - * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild, - * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906] - * - * In term of that paper, let N - number of tuples, Xi - number of tuples with - * key Ki, then the estimate of number of comparisons is: + * In term of that paper, let N - number of tuples, Xi - number of identical + * tuples with value Ki, then the estimate of number of comparisons is: * * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi)) * - * In our case all Xi are the same because now we don't have any estimation of + * We assume all Xi the same because now we don't have any estimation of * group sizes, we have only know the estimate of number of groups (distinct * values). In that case, formula becomes: * @@ -1865,7 +1863,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) * * For multi-column sorts we need to estimate the number of comparisons for * each individual column - for example with columns (c1, c2, ..., ck) we - * can estimate that number of comparions on ck is roughly + * can estimate that number of comparisons on ck is roughly * * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1)) * @@ -1874,10 +1872,10 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) * * N * sum( Fk * log(Gk) ) * - * Note: We also consider column witdth, not just the comparator cost. + * Note: We also consider column width, not just the comparator cost. * * NOTE: some callers currently pass NIL for pathkeys because they - * can't conveniently supply the sort keys. In this case, it will fallback to + * can't conveniently supply the sort keys. In this case, it will fallback to * simple comparison cost estimate. */ static Cost @@ -1925,13 +1923,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, */ foreach(lc, pathkeys) { - PathKey *pathkey = (PathKey*)lfirst(lc); + PathKey *pathkey = (PathKey*) lfirst(lc); EquivalenceMember *em; double nGroups, correctedNGroups; /* - * We believe than equivalence members aren't very different, so, to + * We believe that equivalence members aren't very different, so, to * estimate cost we take just first member */ em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members); @@ -1964,7 +1962,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, totalFuncCost += funcCost; - /* remeber if we have a fake var in pathkeys */ + /* Remember if we have a fake var in pathkeys */ has_fake_var |= is_fake_var(em->em_expr); pathkeyExprs = lappend(pathkeyExprs, em->em_expr); @@ -1974,7 +1972,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, */ if (has_fake_var == false) /* - * Recursively compute number of group in group from previous step + * Recursively compute number of groups in a group from previous step */ nGroups = estimate_num_groups_incremental(root, pathkeyExprs, tuplesPerPrevGroup, NULL, NULL, @@ -1992,8 +1990,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, * * XXX What's the logic of the following formula? */ - nGroups = ceil(2.0 + sqrt(tuples) * - list_length(pathkeyExprs) / list_length(pathkeys)); + nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); else nGroups = tuples; @@ -2033,7 +2030,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, /* * We could skip all following columns for cost estimation, because we - * believe that tuples are unique by set ot previous columns + * believe that tuples are unique by the set of previous columns */ if (tuplesPerPrevGroup <= 1.0) break; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 457018c2a4..f1919823da 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -544,9 +544,10 @@ PathkeyMutatorNext(PathkeyMutatorState *state) return state->elemsList; } -typedef struct { - Cost cost; - PathKey *pathkey; +typedef struct PathkeySortCost +{ + Cost cost; + PathKey *pathkey; } PathkeySortCost; static int @@ -793,7 +794,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows, * more complex logic to decide the ordering. * * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's - * more a complement, because it allows benefinting from incremental sort + * more a complement, because it allows benefiting from incremental sort * as much as possible. * * XXX This does nothing if (n_preordered == 0). We shouldn't create the diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index fac5822e5b..a7e36ed76e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6158,7 +6158,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { ListCell *lc2; Path *path = (Path *) lfirst(lc); - Path *path_save = path; Path *path_original = path; List *pathkey_orderings = NIL; @@ -6182,9 +6181,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, int presorted_keys = 0; PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* restore the path (we replace it in the loop) */ - path = path_save; - is_sorted = pathkeys_count_contained_in(info->pathkeys, path->pathkeys, &presorted_keys); @@ -6653,7 +6649,6 @@ create_partial_grouping_paths(PlannerInfo *root, { ListCell *lc2; Path *path = (Path *) lfirst(lc); - Path *path_save = path; List *pathkey_orderings = NIL; @@ -6676,9 +6671,6 @@ create_partial_grouping_paths(PlannerInfo *root, int presorted_keys = 0; PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* restore the path (we replace it in the loop) */ - path = path_save; - is_sorted = pathkeys_count_contained_in(info->pathkeys, path->pathkeys, &presorted_keys); -- 2.25.1