From 976fdd2f13eaa14e356f0f3a003c6503d907b111 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 18 Jul 2019 12:28:16 +0200 Subject: [PATCH] Use column collation for extended statistics The current extended statistics code was a bit confused which collation to use. When building the statistics, the collations defined as default for the data types were used (since commit 5e0928005). The MCV code was however using the column collations for MCV serialization, and then DEFAULT_COLLATION_OID when computing estimates. So overall the code was using all three possible options, inconsistently. This uses the column colation everywhere - this makes it consistent with what 5e0928005 did for regular stats. We however do not track the collations in a catalog, because we can derive them from column-level information. This may need to change in the future, e.g. after allowing statistics on expressions. Discussion: https://postgr.es/m/8736jdhbhc.fsf%40ansel.ydns.eu Backpatch-to: 12 --- src/backend/statistics/dependencies.c | 2 +- src/backend/statistics/mcv.c | 10 +++++----- src/backend/statistics/mvdistinct.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 66c38ce2bc..585cad2ad9 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -273,7 +273,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, colstat->attrtypid); /* prepare the sort function for this dimension */ - multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); } /* diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index e5a4e86c5d..ef66029caa 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -366,7 +366,7 @@ build_mss(VacAttrStats **stats, int numattrs) elog(ERROR, "cache lookup failed for ordering operator for type %u", colstat->attrtypid); - multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); } return mss; @@ -686,7 +686,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) /* sort and deduplicate the data */ ssup[dim].ssup_cxt = CurrentMemoryContext; - ssup[dim].ssup_collation = DEFAULT_COLLATION_OID; + ssup[dim].ssup_collation = stats[dim]->attrcollid; ssup[dim].ssup_nulls_first = false; PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); @@ -1640,7 +1640,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, * or (const op var). */ match = DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, cst->constvalue, item->values[idx])); @@ -1658,12 +1658,12 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, */ if (isgt) match = DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, cst->constvalue, item->values[idx])); else match = DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, item->values[idx], cst->constvalue)); diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 9ebf183d90..228fa2684f 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -477,7 +477,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, colstat->attrtypid); /* prepare the sort function for this dimension */ - multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); /* accumulate all the data for this dimension into the arrays */ for (j = 0; j < numrows; j++) -- 2.21.0