Index: src/backend/optimizer/path/costsize.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v retrieving revision 1.189 diff -c -r1.189 costsize.c *** src/backend/optimizer/path/costsize.c 15 Nov 2007 22:25:15 -0000 1.189 --- src/backend/optimizer/path/costsize.c 6 Dec 2007 23:46:32 -0000 *************** *** 1372,1383 **** double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, ! inner_rows; double mergejointuples, rescannedtuples; double rescanratio; ! Selectivity outerscansel, ! innerscansel; Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ --- 1372,1387 ---- double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, ! inner_rows, ! outer_skip_rows, ! inner_skip_rows; double mergejointuples, rescannedtuples; double rescanratio; ! Selectivity outerstartsel, ! outerendsel, ! innerstartsel, ! innerendsel; Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ *************** *** 1444,1453 **** * A merge join will stop as soon as it exhausts either input stream * (unless it's an outer join, in which case the outer side has to be * scanned all the way anyway). Estimate fraction of the left and right ! * inputs that will actually need to be scanned. We use only the first ! * (most significant) merge clause for this purpose. Since ! * mergejoinscansel() is a fairly expensive computation, we cache the ! * results in the merge clause RestrictInfo. */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { --- 1448,1459 ---- * A merge join will stop as soon as it exhausts either input stream * (unless it's an outer join, in which case the outer side has to be * scanned all the way anyway). Estimate fraction of the left and right ! * inputs that will actually need to be scanned. Likewise, we can ! * estimate the number of rows that will be skipped before the first ! * join pair is found, which should be factored into startup cost. ! * We use only the first (most significant) merge clause for this purpose. ! * Since mergejoinscansel() is a fairly expensive computation, we cache ! * the results in the merge clause RestrictInfo. */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { *************** *** 1478,1514 **** outer_path->parent->relids)) { /* left side of clause is outer */ ! outerscansel = cache->leftscansel; ! innerscansel = cache->rightscansel; } else { /* left side of clause is inner */ ! outerscansel = cache->rightscansel; ! innerscansel = cache->leftscansel; } if (path->jpath.jointype == JOIN_LEFT) ! outerscansel = 1.0; else if (path->jpath.jointype == JOIN_RIGHT) ! innerscansel = 1.0; } else { /* cope with clauseless or full mergejoin */ ! outerscansel = innerscansel = 1.0; } ! /* convert selectivity to row count; must scan at least one row */ ! outer_rows = clamp_row_est(outer_path_rows * outerscansel); ! inner_rows = clamp_row_est(inner_path_rows * innerscansel); /* * Readjust scan selectivities to account for above rounding. This is * normally an insignificant effect, but when there are only a few rows in * the inputs, failing to do this makes for a large percentage error. */ ! outerscansel = outer_rows / outer_path_rows; ! innerscansel = inner_rows / inner_path_rows; /* cost of source data */ --- 1484,1544 ---- outer_path->parent->relids)) { /* left side of clause is outer */ ! outerstartsel = cache->leftstartsel; ! outerendsel = cache->leftendsel; ! innerstartsel = cache->rightstartsel; ! innerendsel = cache->rightendsel; } else { /* left side of clause is inner */ ! outerstartsel = cache->rightstartsel; ! outerendsel = cache->rightendsel; ! innerstartsel = cache->leftstartsel; ! innerendsel = cache->leftendsel; } if (path->jpath.jointype == JOIN_LEFT) ! { ! outerstartsel = 0.0; ! outerendsel = 1.0; ! } else if (path->jpath.jointype == JOIN_RIGHT) ! { ! innerstartsel = 0.0; ! innerendsel = 1.0; ! } } else { /* cope with clauseless or full mergejoin */ ! outerstartsel = innerstartsel = 0.0; ! outerendsel = innerendsel = 1.0; } ! /* ! * Convert selectivities to row counts. We force outer_rows and ! * inner_rows to be at least 1, but the skip_rows estimates can be zero. ! */ ! outer_skip_rows = rint(outer_path_rows * outerstartsel); ! inner_skip_rows = rint(inner_path_rows * innerstartsel); ! outer_rows = clamp_row_est(outer_path_rows * outerendsel); ! inner_rows = clamp_row_est(inner_path_rows * innerendsel); ! ! Assert(outer_skip_rows <= outer_rows); ! Assert(inner_skip_rows <= inner_rows); /* * Readjust scan selectivities to account for above rounding. This is * normally an insignificant effect, but when there are only a few rows in * the inputs, failing to do this makes for a large percentage error. */ ! outerstartsel = outer_skip_rows / outer_path_rows; ! innerstartsel = inner_skip_rows / inner_path_rows; ! outerendsel = outer_rows / outer_path_rows; ! innerendsel = inner_rows / inner_path_rows; ! ! Assert(outerstartsel <= outerendsel); ! Assert(innerstartsel <= innerendsel); /* cost of source data */ *************** *** 1522,1535 **** outer_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) ! * outerscansel; } else { startup_cost += outer_path->startup_cost; run_cost += (outer_path->total_cost - outer_path->startup_cost) ! * outerscansel; } if (innersortkeys) /* do we need to sort inner? */ --- 1552,1569 ---- outer_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * outerstartsel; run_cost += (sort_path.total_cost - sort_path.startup_cost) ! * (outerendsel - outerstartsel); } else { startup_cost += outer_path->startup_cost; + startup_cost += (outer_path->total_cost - outer_path->startup_cost) + * outerstartsel; run_cost += (outer_path->total_cost - outer_path->startup_cost) ! * (outerendsel - outerstartsel); } if (innersortkeys) /* do we need to sort inner? */ *************** *** 1542,1555 **** inner_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) ! * innerscansel * rescanratio; } else { startup_cost += inner_path->startup_cost; run_cost += (inner_path->total_cost - inner_path->startup_cost) ! * innerscansel * rescanratio; } /* CPU costs */ --- 1576,1593 ---- inner_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; + startup_cost += (sort_path.total_cost - sort_path.startup_cost) + * innerstartsel * rescanratio; run_cost += (sort_path.total_cost - sort_path.startup_cost) ! * (innerendsel - innerstartsel) * rescanratio; } else { startup_cost += inner_path->startup_cost; + startup_cost += (inner_path->total_cost - inner_path->startup_cost) + * innerstartsel * rescanratio; run_cost += (inner_path->total_cost - inner_path->startup_cost) ! * (innerendsel - innerstartsel) * rescanratio; } /* CPU costs */ *************** *** 1571,1578 **** * joininfactor. */ startup_cost += merge_qual_cost.startup; run_cost += merge_qual_cost.per_tuple * ! (outer_rows + inner_rows * rescanratio); /* * For each tuple that gets through the mergejoin proper, we charge --- 1609,1619 ---- * joininfactor. */ startup_cost += merge_qual_cost.startup; + startup_cost += merge_qual_cost.per_tuple * + (outer_skip_rows + inner_skip_rows * rescanratio); run_cost += merge_qual_cost.per_tuple * ! ((outer_rows - outer_skip_rows) + ! (inner_rows - inner_skip_rows) * rescanratio); /* * For each tuple that gets through the mergejoin proper, we charge *************** *** 1597,1604 **** { MergeScanSelCache *cache; ListCell *lc; ! Selectivity leftscansel, ! rightscansel; MemoryContext oldcontext; /* Do we have this result already? */ --- 1638,1647 ---- { MergeScanSelCache *cache; ListCell *lc; ! Selectivity leftstartsel, ! leftendsel, ! rightstartsel, ! rightendsel; MemoryContext oldcontext; /* Do we have this result already? */ *************** *** 1617,1624 **** pathkey->pk_opfamily, pathkey->pk_strategy, pathkey->pk_nulls_first, ! &leftscansel, ! &rightscansel); /* Cache the result in suitably long-lived workspace */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); --- 1660,1669 ---- pathkey->pk_opfamily, pathkey->pk_strategy, pathkey->pk_nulls_first, ! &leftstartsel, ! &leftendsel, ! &rightstartsel, ! &rightendsel); /* Cache the result in suitably long-lived workspace */ oldcontext = MemoryContextSwitchTo(root->planner_cxt); *************** *** 1627,1634 **** cache->opfamily = pathkey->pk_opfamily; cache->strategy = pathkey->pk_strategy; cache->nulls_first = pathkey->pk_nulls_first; ! cache->leftscansel = leftscansel; ! cache->rightscansel = rightscansel; rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); --- 1672,1681 ---- cache->opfamily = pathkey->pk_opfamily; cache->strategy = pathkey->pk_strategy; cache->nulls_first = pathkey->pk_nulls_first; ! cache->leftstartsel = leftstartsel; ! cache->leftendsel = leftendsel; ! cache->rightstartsel = rightstartsel; ! cache->rightendsel = rightendsel; rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); Index: src/backend/utils/adt/selfuncs.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v retrieving revision 1.241 diff -c -r1.241 selfuncs.c *** src/backend/utils/adt/selfuncs.c 15 Nov 2007 22:25:16 -0000 1.241 --- src/backend/utils/adt/selfuncs.c 6 Dec 2007 23:46:32 -0000 *************** *** 128,135 **** int rangelo, int rangehi); static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); ! static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, ! Oid sortop, Datum *max); static Selectivity prefix_selectivity(VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); --- 128,135 ---- int rangelo, int rangehi); static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); ! static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, ! Oid sortop, Datum *min, Datum *max); static Selectivity prefix_selectivity(VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon); static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype); *************** *** 2172,2189 **** * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * * clause should be a clause already known to be mergejoinable. opfamily, * strategy, and nulls_first specify the sort ordering being used. * ! * *leftscan is set to the fraction of the left-hand variable expected ! * to be scanned (0 to 1), and similarly *rightscan for the right-hand ! * variable. */ void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, ! Selectivity *leftscan, ! Selectivity *rightscan) { Node *left, *right; --- 2172,2195 ---- * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * + * Also, we can estimate how much of each input has to be read before the + * first join pair is found, which will affect the join's startup time. + * * clause should be a clause already known to be mergejoinable. opfamily, * strategy, and nulls_first specify the sort ordering being used. * ! * The outputs are: ! * *leftstart is set to the fraction of the left-hand variable expected ! * to be scanned before the first join pair is found (0 to 1). ! * *leftend is set to the fraction of the left-hand variable expected ! * to be scanned before the join terminates (0 to 1). ! * *rightstart, *rightend similarly for the right-hand variable. */ void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, ! Selectivity *leftstart, Selectivity *leftend, ! Selectivity *rightstart, Selectivity *rightend) { Node *left, *right; *************** *** 2196,2209 **** Oid opno, lsortop, rsortop, leop, revleop; ! Datum leftmax, rightmax; double selec; /* Set default results if we can't figure anything out. */ ! *leftscan = *rightscan = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) --- 2202,2224 ---- Oid opno, lsortop, rsortop, + lstatop, + rstatop, + ltop, leop, + revltop, revleop; ! bool isgt; ! Datum leftmin, ! leftmax, ! rightmin, rightmax; double selec; /* Set default results if we can't figure anything out. */ ! /* XXX should default "start" fraction be a bit more than 0? */ ! *leftstart = *rightstart = 0.0; ! *leftend = *rightend = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) *************** *** 2229,2258 **** /* * Look up the various operators we need. If we don't find them all, it ! * probably means the opfamily is broken, but we cope anyway. */ switch (strategy) { case BTLessStrategyNumber: ! lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, ! BTLessStrategyNumber); ! rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, ! BTLessStrategyNumber); ! leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, ! BTLessEqualStrategyNumber); ! revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, ! BTLessEqualStrategyNumber); break; case BTGreaterStrategyNumber: /* descending-order case */ ! lsortop = get_opfamily_member(opfamily, op_lefttype, op_lefttype, ! BTGreaterStrategyNumber); ! rsortop = get_opfamily_member(opfamily, op_righttype, op_righttype, ! BTGreaterStrategyNumber); ! leop = get_opfamily_member(opfamily, op_lefttype, op_righttype, ! BTGreaterEqualStrategyNumber); ! revleop = get_opfamily_member(opfamily, op_righttype, op_lefttype, ! BTGreaterEqualStrategyNumber); break; default: goto fail; /* shouldn't get here */ --- 2244,2346 ---- /* * Look up the various operators we need. If we don't find them all, it ! * probably means the opfamily is broken, but we just fail silently. ! * ! * Note: we expect that pg_statistic histograms will be sorted by the ! * '<' operator, regardless of which sort direction we are considering. */ switch (strategy) { case BTLessStrategyNumber: ! isgt = false; ! if (op_lefttype == op_righttype) ! { ! /* easy case */ ! ltop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTLessStrategyNumber); ! leop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTLessEqualStrategyNumber); ! lsortop = ltop; ! rsortop = ltop; ! lstatop = lsortop; ! rstatop = rsortop; ! revltop = ltop; ! revleop = leop; ! } ! else ! { ! ltop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTLessStrategyNumber); ! leop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTLessEqualStrategyNumber); ! lsortop = get_opfamily_member(opfamily, ! op_lefttype, op_lefttype, ! BTLessStrategyNumber); ! rsortop = get_opfamily_member(opfamily, ! op_righttype, op_righttype, ! BTLessStrategyNumber); ! lstatop = lsortop; ! rstatop = rsortop; ! revltop = get_opfamily_member(opfamily, ! op_righttype, op_lefttype, ! BTLessStrategyNumber); ! revleop = get_opfamily_member(opfamily, ! op_righttype, op_lefttype, ! BTLessEqualStrategyNumber); ! } break; case BTGreaterStrategyNumber: /* descending-order case */ ! isgt = true; ! if (op_lefttype == op_righttype) ! { ! /* easy case */ ! ltop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTGreaterStrategyNumber); ! leop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTGreaterEqualStrategyNumber); ! lsortop = ltop; ! rsortop = ltop; ! lstatop = get_opfamily_member(opfamily, ! op_lefttype, op_lefttype, ! BTLessStrategyNumber); ! rstatop = lstatop; ! revltop = ltop; ! revleop = leop; ! } ! else ! { ! ltop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTGreaterStrategyNumber); ! leop = get_opfamily_member(opfamily, ! op_lefttype, op_righttype, ! BTGreaterEqualStrategyNumber); ! lsortop = get_opfamily_member(opfamily, ! op_lefttype, op_lefttype, ! BTGreaterStrategyNumber); ! rsortop = get_opfamily_member(opfamily, ! op_righttype, op_righttype, ! BTGreaterStrategyNumber); ! lstatop = get_opfamily_member(opfamily, ! op_lefttype, op_lefttype, ! BTLessStrategyNumber); ! rstatop = get_opfamily_member(opfamily, ! op_righttype, op_righttype, ! BTLessStrategyNumber); ! revltop = get_opfamily_member(opfamily, ! op_righttype, op_lefttype, ! BTGreaterStrategyNumber); ! revleop = get_opfamily_member(opfamily, ! op_righttype, op_lefttype, ! BTGreaterEqualStrategyNumber); ! } break; default: goto fail; /* shouldn't get here */ *************** *** 2260,2325 **** if (!OidIsValid(lsortop) || !OidIsValid(rsortop) || !OidIsValid(leop) || !OidIsValid(revleop)) goto fail; /* insufficient info in catalogs */ ! /* Try to get maximum values of both inputs */ ! if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax)) ! goto fail; /* no max available from stats */ ! ! if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax)) ! goto fail; /* no max available from stats */ /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe ! * non-default estimates, else stick with our 1.0. Also, if the sort ! * order is nulls-first, we're going to have to read over any nulls too. */ ! selec = scalarineqsel(root, leop, false, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) ! { ! if (nulls_first && HeapTupleIsValid(leftvar.statsTuple)) ! { ! Form_pg_statistic stats; ! ! stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); ! selec += stats->stanullfrac; ! CLAMP_PROBABILITY(selec); ! } ! *leftscan = selec; ! } /* And similarly for the right variable. */ ! selec = scalarineqsel(root, revleop, false, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) { ! if (nulls_first && HeapTupleIsValid(rightvar.statsTuple)) ! { ! Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); ! selec += stats->stanullfrac; ! CLAMP_PROBABILITY(selec); } - *rightscan = selec; } ! /* ! * Only one of the two fractions can really be less than 1.0; believe the ! * smaller estimate and reset the other one to exactly 1.0. If we get ! * exactly equal estimates (as can easily happen with self-joins), believe ! * neither. ! */ ! if (*leftscan > *rightscan) ! *leftscan = 1.0; ! else if (*leftscan < *rightscan) ! *rightscan = 1.0; ! else ! *leftscan = *rightscan = 1.0; fail: ReleaseVariableStats(leftvar); --- 2348,2480 ---- if (!OidIsValid(lsortop) || !OidIsValid(rsortop) || + !OidIsValid(lstatop) || + !OidIsValid(rstatop) || + !OidIsValid(ltop) || !OidIsValid(leop) || + !OidIsValid(revltop) || !OidIsValid(revleop)) goto fail; /* insufficient info in catalogs */ ! /* Try to get ranges of both inputs */ ! if (!isgt) ! { ! if (!get_variable_range(root, &leftvar, lstatop, ! &leftmin, &leftmax)) ! goto fail; /* no range available from stats */ ! if (!get_variable_range(root, &rightvar, rstatop, ! &rightmin, &rightmax)) ! goto fail; /* no range available from stats */ ! } ! else ! { ! /* need to swap the max and min */ ! if (!get_variable_range(root, &leftvar, lstatop, ! &leftmax, &leftmin)) ! goto fail; /* no range available from stats */ ! if (!get_variable_range(root, &rightvar, rstatop, ! &rightmax, &rightmin)) ! goto fail; /* no range available from stats */ ! } /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe ! * non-default estimates, else stick with our 1.0. */ ! selec = scalarineqsel(root, leop, isgt, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) ! *leftend = selec; /* And similarly for the right variable. */ ! selec = scalarineqsel(root, revleop, isgt, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) + *rightend = selec; + + /* + * Only one of the two "end" fractions can really be less than 1.0; + * believe the smaller estimate and reset the other one to exactly 1.0. + * If we get exactly equal estimates (as can easily happen with + * self-joins), believe neither. + */ + if (*leftend > *rightend) + *leftend = 1.0; + else if (*leftend < *rightend) + *rightend = 1.0; + else + *leftend = *rightend = 1.0; + + /* + * Also, the fraction of the left variable that will be scanned before + * the first join pair is found is the fraction that's < the right-side + * minimum value. But only believe non-default estimates, else stick with + * our own default. + */ + selec = scalarineqsel(root, ltop, isgt, &leftvar, + rightmin, op_righttype); + if (selec != DEFAULT_INEQ_SEL) + *leftstart = selec; + + /* And similarly for the right variable. */ + selec = scalarineqsel(root, revltop, isgt, &rightvar, + leftmin, op_lefttype); + if (selec != DEFAULT_INEQ_SEL) + *rightstart = selec; + + /* + * Only one of the two "start" fractions can really be more than zero; + * believe the larger estimate and reset the other one to exactly 0.0. + * If we get exactly equal estimates (as can easily happen with + * self-joins), believe neither. + */ + if (*leftstart < *rightstart) + *leftstart = 0.0; + else if (*leftstart > *rightstart) + *rightstart = 0.0; + else + *leftstart = *rightstart = 0.0; + + /* + * If the sort order is nulls-first, we're going to have to skip over any + * nulls too. These would not have been counted by scalarineqsel, and + * we can safely add in this fraction regardless of whether we believe + * scalarineqsel's results or not. But be sure to clamp the sum to 1.0! + */ + if (nulls_first) { ! Form_pg_statistic stats; + if (HeapTupleIsValid(leftvar.statsTuple)) + { + stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); + *leftstart += stats->stanullfrac; + CLAMP_PROBABILITY(*leftstart); + *leftend += stats->stanullfrac; + CLAMP_PROBABILITY(*leftend); + } + if (HeapTupleIsValid(rightvar.statsTuple)) + { stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); ! *rightstart += stats->stanullfrac; ! CLAMP_PROBABILITY(*rightstart); ! *rightend += stats->stanullfrac; ! CLAMP_PROBABILITY(*rightend); } } ! /* Disbelieve start >= end, just in case that can happen */ ! if (*leftstart >= *leftend) ! { ! *leftstart = 0.0; ! *leftend = 1.0; ! } ! if (*rightstart >= *rightend) ! { ! *rightstart = 0.0; ! *rightend = 1.0; ! } fail: ReleaseVariableStats(leftvar); *************** *** 3778,3797 **** } /* ! * get_variable_maximum ! * Estimate the maximum value of the specified variable. ! * If successful, store value in *max and return TRUE. * If no data available, return FALSE. * ! * sortop is the "<" comparison operator to use. (To extract the ! * minimum instead of the maximum, just pass the ">" operator instead.) */ static bool ! get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, ! Oid sortop, Datum *max) { Datum tmax = 0; ! bool have_max = false; Form_pg_statistic stats; int16 typLen; bool typByVal; --- 3933,3953 ---- } /* ! * get_variable_range ! * Estimate the minimum and maximum value of the specified variable. ! * If successful, store values in *min and *max, and return TRUE. * If no data available, return FALSE. * ! * sortop is the "<" comparison operator to use. This should generally ! * be "<" not ">", as only the former is likely to be found in pg_statistic. */ static bool ! get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, ! Datum *min, Datum *max) { + Datum tmin = 0; Datum tmax = 0; ! bool have_data = false; Form_pg_statistic stats; int16 typLen; bool typByVal; *************** *** 3809,3815 **** get_typlenbyval(vardata->atttype, &typLen, &typByVal); /* ! * If there is a histogram, grab the last or first value as appropriate. * * If there is a histogram that is sorted with some other operator than * the one we want, fail --- this suggests that there is data we can't --- 3965,3971 ---- get_typlenbyval(vardata->atttype, &typLen, &typByVal); /* ! * If there is a histogram, grab the first and last values. * * If there is a histogram that is sorted with some other operator than * the one we want, fail --- this suggests that there is data we can't *************** *** 3823,3864 **** { if (nvalues > 0) { tmax = datumCopy(values[nvalues - 1], typByVal, typLen); ! have_max = true; } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } ! else { ! Oid rsortop = get_commutator(sortop); ! ! if (OidIsValid(rsortop) && ! get_attstatsslot(vardata->statsTuple, ! vardata->atttype, vardata->atttypmod, ! STATISTIC_KIND_HISTOGRAM, rsortop, ! &values, &nvalues, ! NULL, NULL)) ! { ! if (nvalues > 0) ! { ! tmax = datumCopy(values[0], typByVal, typLen); ! have_max = true; ! } ! free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); ! } ! else if (get_attstatsslot(vardata->statsTuple, ! vardata->atttype, vardata->atttypmod, ! STATISTIC_KIND_HISTOGRAM, InvalidOid, ! &values, &nvalues, ! NULL, NULL)) ! { ! free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); ! return false; ! } } /* ! * If we have most-common-values info, look for a large MCV. This is * needed even if we also have a histogram, since the histogram excludes * the MCVs. However, usually the MCVs will not be the extreme values, so * avoid unnecessary data copying. --- 3979,4002 ---- { if (nvalues > 0) { + tmin = datumCopy(values[0], typByVal, typLen); tmax = datumCopy(values[nvalues - 1], typByVal, typLen); ! have_data = true; } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } ! else if (get_attstatsslot(vardata->statsTuple, ! vardata->atttype, vardata->atttypmod, ! STATISTIC_KIND_HISTOGRAM, InvalidOid, ! &values, &nvalues, ! NULL, NULL)) { ! free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); ! return false; } /* ! * If we have most-common-values info, look for extreme MCVs. This is * needed even if we also have a histogram, since the histogram excludes * the MCVs. However, usually the MCVs will not be the extreme values, so * avoid unnecessary data copying. *************** *** 3869,3899 **** &values, &nvalues, NULL, NULL)) { ! bool large_mcv = false; FmgrInfo opproc; fmgr_info(get_opcode(sortop), &opproc); for (i = 0; i < nvalues; i++) { ! if (!have_max) { ! tmax = values[i]; ! large_mcv = have_max = true; } ! else if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i]))) { tmax = values[i]; ! large_mcv = true; } } ! if (large_mcv) tmax = datumCopy(tmax, typByVal, typLen); free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } *max = tmax; ! return have_max; } --- 4007,4047 ---- &values, &nvalues, NULL, NULL)) { ! bool tmin_is_mcv = false; ! bool tmax_is_mcv = false; FmgrInfo opproc; fmgr_info(get_opcode(sortop), &opproc); for (i = 0; i < nvalues; i++) { ! if (!have_data) { ! tmin = tmax = values[i]; ! tmin_is_mcv = tmax_is_mcv = have_data = true; ! continue; ! } ! if (DatumGetBool(FunctionCall2(&opproc, values[i], tmin))) ! { ! tmin = values[i]; ! tmin_is_mcv = true; } ! if (DatumGetBool(FunctionCall2(&opproc, tmax, values[i]))) { tmax = values[i]; ! tmax_is_mcv = true; } } ! if (tmin_is_mcv) ! tmin = datumCopy(tmin, typByVal, typLen); ! if (tmax_is_mcv) tmax = datumCopy(tmax, typByVal, typLen); free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } + *min = tmin; *max = tmax; ! return have_data; } Index: src/include/nodes/relation.h =================================================================== RCS file: /cvsroot/pgsql/src/include/nodes/relation.h,v retrieving revision 1.150 diff -c -r1.150 relation.h *** src/include/nodes/relation.h 15 Nov 2007 22:25:17 -0000 1.150 --- src/include/nodes/relation.h 6 Dec 2007 23:46:32 -0000 *************** *** 993,1000 **** int strategy; /* sort direction (ASC or DESC) */ bool nulls_first; /* do NULLs come before normal values? */ /* Results */ ! Selectivity leftscansel; /* scan fraction for clause left side */ ! Selectivity rightscansel; /* scan fraction for clause right side */ } MergeScanSelCache; /* --- 993,1002 ---- int strategy; /* sort direction (ASC or DESC) */ bool nulls_first; /* do NULLs come before normal values? */ /* Results */ ! Selectivity leftstartsel; /* first-join fraction for clause left side */ ! Selectivity leftendsel; /* last-join fraction for clause left side */ ! Selectivity rightstartsel; /* first-join fraction for clause right side */ ! Selectivity rightendsel; /* last-join fraction for clause right side */ } MergeScanSelCache; /* Index: src/include/utils/selfuncs.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/selfuncs.h,v retrieving revision 1.41 diff -c -r1.41 selfuncs.h *** src/include/utils/selfuncs.h 7 Nov 2007 22:37:24 -0000 1.41 --- src/include/utils/selfuncs.h 6 Dec 2007 23:46:32 -0000 *************** *** 161,168 **** extern void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, ! Selectivity *leftscan, ! Selectivity *rightscan); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows); --- 161,168 ---- extern void mergejoinscansel(PlannerInfo *root, Node *clause, Oid opfamily, int strategy, bool nulls_first, ! Selectivity *leftstart, Selectivity *leftend, ! Selectivity *rightstart, Selectivity *rightend); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows);