diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 2b63827..1acf118 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -98,6 +98,11 @@ static int compare_rows(const void *a, const void *b); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); +static int acquire_sample_index(Relation onerel, Relation index, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows); +static double correlation(int values_cnt, double corr_xysum); +static double idx_correlation (Oid indexoid, VacAttrStatsP stats, int targrows, int num_mcvs, Datum *mcv_values); static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -206,6 +211,7 @@ analyze_rel(Oid relid, RangeVar *relation, int options, * used to do this in get_rel_oids() but seems safer to check after we've * locked the relation. */ + // TODO: if (onerel->rd_rel->relkind == RELKIND_INDEX) => needs a separate sample_rows function? if (onerel->rd_rel->relkind == RELKIND_RELATION || onerel->rd_rel->relkind == RELKIND_MATVIEW) { @@ -433,37 +439,41 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, { AnlIndexData *thisdata = &indexdata[ind]; IndexInfo *indexInfo; + ListCell *indexpr_item; thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); thisdata->tupleFract = 1.0; /* fix later if partial */ - if (indexInfo->ii_Expressions != NIL && va_cols == NIL) + if (va_cols != NIL) { - ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); + continue; + } - thisdata->vacattrstats = (VacAttrStats **) + indexpr_item = list_head(indexInfo->ii_Expressions); + thisdata->vacattrstats = (VacAttrStats **) palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); - tcnt = 0; - for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) - { - int keycol = indexInfo->ii_KeyAttrNumbers[i]; + tcnt = 0; + for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + int keycol = indexInfo->ii_KeyAttrNumbers[i]; + Node *indexkey=NULL; - if (keycol == 0) - { - /* Found an index expression */ - Node *indexkey; - - if (indexpr_item == NULL) /* shouldn't happen */ - elog(ERROR, "too few entries in indexprs list"); - indexkey = (Node *) lfirst(indexpr_item); - indexpr_item = lnext(indexpr_item); - thisdata->vacattrstats[tcnt] = - examine_attribute(Irel[ind], i + 1, indexkey); - if (thisdata->vacattrstats[tcnt] != NULL) - tcnt++; - } + if (keycol == 0) + { + /* Found an index expression */ + if (indexpr_item == NULL) /* shouldn't happen */ + elog(ERROR, "too few entries in indexprs list"); + indexkey = (Node *) lfirst(indexpr_item); + indexpr_item = lnext(indexpr_item); } - thisdata->attr_cnt = tcnt; + + thisdata->vacattrstats[tcnt] = + examine_attribute(Irel[ind], i + 1, indexkey); + + if (thisdata->vacattrstats[tcnt] != NULL) + tcnt++; } + + thisdata->attr_cnt = tcnt; } } @@ -548,11 +558,38 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, MemoryContextResetAndDeleteChildren(col_context); } +# if 0 + for (ind = 0; ind < nindexes; ind++) + { + int indnumrows; + double indtotalrows, + indtotaldeadrows; + HeapTuple *indrows= palloc(targrows * sizeof(HeapTuple)); + /* + * Attempted to do an index scan for each index, in order to get + * correllation stats of the HEAP ; I was thinking + * (probably incorrectly) that the high random cost was + * due to index seeks, and that it was needed to + * traverse the index in logical rather than physical + * order to determine its correction. But the planner + * already assumes that index reads are random. + */ + + indnumrows = acquire_sample_index (onerel, Irel[ind], elevel, + indrows, targrows, &indtotalrows, &indtotaldeadrows); + compute_index_stats(Irel[ind], indnumrows, + indexdata+ind, 1, /* XXX: should update c_i_s to do one index only? */ + indrows, indnumrows, + col_context); + + } +# else if (hasindex) - compute_index_stats(onerel, totalrows, - indexdata, nindexes, - rows, numrows, - col_context); + compute_index_stats(onerel, numrows, + indexdata, nindexes, + rows, numrows, + col_context); +# endif MemoryContextSwitchTo(old_context); MemoryContextDelete(col_context); @@ -721,10 +758,6 @@ compute_index_stats(Relation onerel, double totalrows, rowno; double totalindexrows; - /* Ignore index if no columns to analyze and not partial */ - if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL) - continue; - /* * Need an EState for evaluation of index expressions and * partial-index predicates. Create it in the per-index context to be @@ -767,6 +800,7 @@ compute_index_stats(Relation onerel, double totalrows, if (!ExecQual(predicate, econtext)) continue; } + numindexrows++; if (attr_cnt > 0) @@ -790,6 +824,7 @@ compute_index_stats(Relation onerel, double totalrows, VacAttrStats *stats = thisdata->vacattrstats[i]; int attnum = stats->attr->attnum; + stats->rows=rows; if (isnull[attnum - 1]) { exprvals[tcnt] = (Datum) 0; @@ -815,7 +850,7 @@ compute_index_stats(Relation onerel, double totalrows, totalindexrows = ceil(thisdata->tupleFract * totalrows); /* - * Now we can compute the statistics for the expression columns. + * Now we can compute the statistics. */ if (numindexrows > 0) { @@ -830,11 +865,16 @@ compute_index_stats(Relation onerel, double totalrows, stats->exprvals = exprvals + i; stats->exprnulls = exprnulls + i; stats->rowstride = attr_cnt; +// Low stats target for non-expr indices to avoid having an duplicate MCV and/or hist ? +// avoid keeping an (duplicate) MCV list or +// histogram for non-expression columns? +// if (indexInfo->ii_KeyAttrNumbers[i]!=0) // && stats->attr->attstattarget!=0) +// stats->attr->attstattarget=0; + (*stats->compute_stats) (stats, ind_fetch_func, numindexrows, totalindexrows); - /* * If the n_distinct option is specified, it overrides the * above computation. For indices, we always use just @@ -896,7 +936,7 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) /* * When analyzing an expression index, believe the expression tree's type * not the column datatype --- the latter might be the opckeytype storage - * type of the opclass, which is not interesting for our purposes. (Note: + * type of the opclass, which is not interesting for our purposes. XXX (Note: * if we did anything with non-expression index columns, we'd need to * figure out where to get the correct type info from, but for now that's * not a problem.) It's not clear whether anyone will care about the @@ -1478,6 +1518,305 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, } +// this doesn't work, since broad/course correlation doesn't well represent +// non-sequential heap page access which may happen at a small scale.. +static int +acquire_sample_index(Relation onerel, Relation index, int elevel, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) +{ + int numrows = 0; /* # rows now in reservoir */ + double samplerows = 0; /* total # rows collected */ + double rowstoskip = -1; /* -1 means not set yet */ + + HeapTuple targtuple; + // Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot()); + // Snapshot snapshot = GetLatestSnapshot(); + Snapshot snapshot = GetActiveSnapshot(); + // Snapshot snapshot = GetOldestSnapshot(); + + /* Prepare for sampling rows */ + ReservoirStateData rstate; + IndexScanDesc index_scan; + reservoir_init_selection_state(&rstate, targrows); + + // ScanKeyData scankeys[1]; + // ScanKeyEntryInitialize(scankeys, SK_ISNULL|SK_SEARCHNOTNULL, 1, InvalidStrategy, InvalidOid, InvalidOid, InvalidOid, (Datum) 0); + // XXX search not null ?? + // SK_SEARCHNOTNULL + index_scan= index_beginscan(onerel, index, snapshot, 0, 0); + // index_rescan(index_scan, scankeys, 0, NULL, 0); + index_rescan(index_scan, NULL, 0, NULL, 0); + + // while ((targtuple = index_getnext(index_scan, ForwardScanDirection)) != NULL) + // XXX: it's pretty unfortunate to do a full index scan for each table + // being analyzed ... esp. since a design goal is to avoid very + // inefficient random read patterns during index scans... consider + // alternatives like index scanning for each MCV ? However I suspect + // that would tend to strongly underestimate correlation in some cases + // and too strongly discourage index scans.. + // Probably go back to a block sample of leaf pages, counting the + // linearity of its referenced heap blocks within an index page, but + // not from one index page to another??? + + // for (unsigned long int c=0; (targtuple = index_getnext(index_scan, BackwardScanDirection)) != NULL ; ++c) // Is there a more useful value to use here ??? + for (unsigned long int c=0; (targtuple = index_getnext(index_scan, ForwardScanDirection)) != NULL ; ++c) // Is there a more useful value to use here ??? + // ItemPointer tid; + // while ((tid = index_getnext_tid(index_scan, ForwardScanDirection)) != NULL) + { + HeapTuple h = heap_copytuple(targtuple); + + // ItemPointerSet(&(h->t_self), c>>16, c&0xffff ); + // index_fetch_heap(index_scan); + + ItemPointerSet(&(h->t_self), c>>16, c&0xffff ); // no good, the correlation of heap value WRT index location is 100% ... + // h->t_self.ip_blkid.bi_lo=c&0xffff; // >>8; // ItemPointerSet(&(h->t_self), c>>16, c&0xffff ); + + /* Vitter's algorithm, see above */ + // ItemPointerSetBlockNumber(&h->t_self, c); + // ItemPointerSetOffsetNumber(&h->t_self, c); + // h->t_self.ip_blkid.bi_hi=c>>16; + // h->t_self.ip_blkid.bi_lo=c&OffsetNumberMask; + // h->t_self.ip_blkid.bi_hi+=c/30; + + if (numrows < targrows) { + rows[numrows++] = h; + samplerows += 1; + continue; + } + + if (rowstoskip < 0) + rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows); + + if (rowstoskip <= 0) + { + int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + + Assert(k >= 0 && k < targrows); + heap_freetuple(rows[k]); + rows[k] = h; + } + + rowstoskip -= 1; + samplerows += 1; + } + + index_endscan(index_scan); + + if (numrows == targrows) + qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); + + *totalrows = samplerows; + // totalblocks = RelationGetNumberOfBlocks(onerel); + // *totalrows = vac_estimate_reltuples(onerel, true, totalblocks, bs.m, liverows); + // if (bs.m > 0) + // *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); + // else + // *totaldeadrows = 0.0; + + ereport(elevel, + (errmsg("\"%s\": scanned %d of %u pages, " + "containing %.0f live rows and %.0f dead rows; " + "%d rows in sample, %.0f estimated total rows", + RelationGetRelationName(index), + -1, -1, + -1.0, -1.0, + numrows, *totalrows))); + + + return numrows ; +} + +/* + * Return correlation coeffiecient given sum(x*y), where x is a list giving + * sort with values order by one thing, and y is list given order sorted by + * another thing (eg. heap value vs. heap location). + */ +static double +correlation(int values_cnt, double corr_xysum) +{ + /*---------- + * Since we know the x and y value sets are both + * 0, 1, ..., values_cnt-1 + * we have sum(x) = sum(y) = + * (values_cnt-1)*values_cnt / 2 + * and sum(x^2) = sum(y^2) = + * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. + *---------- + * ... and the correlation coefficient reduces to */ + double corr_xsum = ((double) (values_cnt - 1)) * + ((double) values_cnt) / 2.0; + double corr_x2sum = ((double) (values_cnt - 1)) * + ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0; + + return (values_cnt * fabs(corr_xysum) - corr_xsum * corr_xsum) / + (values_cnt * corr_x2sum - corr_xsum * corr_xsum); +} + +/* + * Compute avg of separately-computed correlation values, rather than + * correlation of single sample across entire table, which overestimates + * correlation for large tables. targrows is the number of pages to sample in + * each batch. + */ +static double +idx_correlation (Oid indexoid, VacAttrStatsP stats, int targrows, int num_mcvs, Datum *mcv_values) +{ + HeapTuple *rows= stats->rows; // reusing this allocation + double corr_sum=0; + int corr_samples=0; + int numrows = 0; + double deadrows = 0; + double liverows = 0; + long int batch; + double samplerows = 0; + + Snapshot snapshot = GetActiveSnapshot(); + Oid heapoid = IndexGetRelation(indexoid, false); + Relation heap = relation_open(heapoid, AccessShareLock); + Relation index= RelationIdGetRelation(indexoid); + IndexScanDesc index_scan = index_beginscan(heap, index, snapshot, 1, 0); + + // int strategy=BTGreaterEqualStrategyNumber; + // Oid opfam=get_opfamily_member(opfamily, stats->attrtypid, stats->attrtypid, BTGreaterEqualStrategyNumber); + + /* For sampling: read the first TARGROWS TIDs from each value in the MCV list */ + // XXX: consider only the first handful of MCVs ? + // XXX: poor correlation in index can prolly happen without MCVs, eg. normally-distributed float values without repeated keys */ + // XXX: .. should we just read the first targrows TIDs returned by the index or is there a better way ?? + + for (batch=0; batchattr->attnum, BTEqualStrategyNumber, get_opcode( ((StdAnalyzeData *)stats->extra_data)->eqopr), mcv_values[batch]); + } else { + /* No MCVs: single iteration over first targrows tuples returned by index */ + // XXX SK_SEARCHNOTNULL + ScanKeyEntryInitialize(&scankeys, SK_ISNULL|SK_SEARCHNOTNULL, stats->attr->attnum, InvalidStrategy, InvalidOid, InvalidOid, InvalidOid, (Datum)0); + } + + index_rescan(index_scan, &scankeys, 1, NULL, 0); + for ( ; numrowst_self = *tid; + rows[numrows]->t_len=numrows; // abusing this field; + numrows++; + liverows++; + } + + // avoid NaN if many dead tuples? if (!numrows) continue; + + /* Retrieved consecutive TIDs, now compute their (fine-level) correlation */ + qsort((void *) rows, numrows, sizeof(*rows), compare_rows); + for (int j=0; jt_len; + + corr_samples++; + corr_sum+=correlation(numrows, corr_xysum); + + numrows=0; + ++batch; + if (tid==NULL) + break; + /* Ran out of index in fewer than targrows */ + } + + ereport(LOG, + (errmsg("\"%s(%s)\": scanned %ld batches with total %.0f TIDs, " + "containing %.0f live and %.0f dead TIDs; ", + RelationGetRelationName(index), + NameStr(stats->attr->attname), + batch, samplerows, + liverows, deadrows))); + + index_endscan(index_scan); + relation_close(index, NoLock); + relation_close(heap, NoLock); + return corr_sum/corr_samples; +} + +#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) +#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) + +/* + * Do a Vitter block walk along entire index, in physical order, to determine + * fraction of LEAF nodes which have "next" pointers with a higher block number + * than themselves.. For a highly-correlated table, this will be ~1, for a + * poorly correlated table(???), or one with many repeated keys, this will be + * between 0 and ~0.5, and index scan across those duplicate keys will have a + * high random component. + * Logic bits stolen from pgstatindex. + */ + +#include "access/nbtree.h" +#include "catalog/pg_am.h" + +static double +idx_corr_fudge(Relation index, int targrows) +{ + BlockNumber totalblocks; + BlockSamplerData bs; + + double leaf_pages = 0; + double fragments = 0; + + // TransactionId OldestXmin; + // OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM); + // Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot()); + // Snapshot snapshot = GetLatestSnapshot(); + // Snapshot snapshot = GetActiveSnapshot(); + // Snapshot snapshot = GetOldestSnapshot(); + if (!IS_BTREE(index)) { + relation_close(index, AccessShareLock); + return 1; + } + + totalblocks = RelationGetNumberOfBlocks(index); + BlockSampler_Init(&bs, totalblocks-1, targrows, random()); + + while (BlockSampler_HasMore(&bs)) + { + BlockNumber blkno = BlockSampler_Next(&bs); + Buffer buffer; + Page page; + BTPageOpaque opaque; + + vacuum_delay_point(); + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); // bstrategy + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + if (P_ISDELETED(opaque) || P_IGNORE(opaque)) + continue; + else if (P_ISLEAF(opaque)) + { + leaf_pages++; + if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) + fragments++; + } + + UnlockReleaseBuffer(buffer); + + } + + relation_close(index, AccessShareLock); + return fragments/leaf_pages; +} + /* * update_attstats() -- update attribute statistics for one relation * @@ -1658,6 +1997,7 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) /* exprvals and exprnulls are already offset for proper column */ i = rownum * stats->rowstride; *isNull = stats->exprnulls[i]; + return stats->exprvals[i]; } @@ -2258,7 +2598,7 @@ compute_scalar_stats(VacAttrStatsP stats, stats->attrtype->typlen == -1); bool is_varwidth = (!stats->attrtype->typbyval && stats->attrtype->typlen < 0); - double corr_xysum; + double corr_xysum=0; SortSupportData ssup; ScalarItem *values; int values_cnt = 0; @@ -2352,10 +2692,17 @@ compute_scalar_stats(VacAttrStatsP stats, dups_cnt; int slot_idx = 0; CompareScalarsContext cxt; + float4 *corrs; /* Sort the collected values */ cxt.ssup = &ssup; cxt.tupnoLink = tupnoLink; + + /* + * tuples were previously sorted by TID; now, sort by heap + * value, as needed for stats computations, and, for order + * relative to original sort for correlation computation. + */ qsort_arg((void *) values, values_cnt, sizeof(ScalarItem), compare_scalars, (void *) &cxt); @@ -2378,7 +2725,6 @@ compute_scalar_stats(VacAttrStatsP stats, * is the last item of its group of duplicates (since the group will * be ordered by tupno). */ - corr_xysum = 0; ndistinct = 0; nmultiple = 0; dups_cnt = 0; @@ -2566,11 +2912,11 @@ compute_scalar_stats(VacAttrStatsP stats, } } + Datum *mcv_values=NULL; /* Generate MCV slot entry */ if (num_mcv > 0) { MemoryContext old_context; - Datum *mcv_values; float4 *mcv_freqs; /* Must copy the target values into anl_context */ @@ -2713,37 +3059,42 @@ compute_scalar_stats(VacAttrStatsP stats, slot_idx++; } - /* Generate a correlation entry if there are multiple values */ - if (values_cnt > 1) - { + /* Will generate a correlation entry if there are multiple values */ + if (values_cnt>1) { MemoryContext old_context; - float4 *corrs; - double corr_xsum, - corr_x2sum; - - /* Must copy the target values into anl_context */ old_context = MemoryContextSwitchTo(stats->anl_context); + /* Must copy the target values into anl_context */ corrs = (float4 *) palloc(sizeof(float4)); MemoryContextSwitchTo(old_context); + } - /*---------- - * Since we know the x and y value sets are both - * 0, 1, ..., values_cnt-1 - * we have sum(x) = sum(y) = - * (values_cnt-1)*values_cnt / 2 - * and sum(x^2) = sum(y^2) = - * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6. - *---------- - */ - corr_xsum = ((double) (values_cnt - 1)) * - ((double) values_cnt) / 2.0; - corr_x2sum = ((double) (values_cnt - 1)) * - ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0; - - /* And the correlation coefficient reduces to */ - corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) / - (values_cnt * corr_x2sum - corr_xsum * corr_xsum); + if (values_cnt>1 && fetchfunc==ind_fetch_func) { /* && num_mcv>0) { */ + /* Compute alternate/fine-grained correlation if there are MCV list with repeated values... */ + // elog(WARNING, "%s %s %d value0:%lu", __FILE__, __FUNCTION__, __LINE__, num_mcv?mcv_values[0] : 1 ); + corrs[0] = idx_correlation (stats->attr->attrelid, stats, samplerows, num_mcv, mcv_values); + elog(WARNING, "%s %s %d cors %lf", __FILE__, __FUNCTION__, __LINE__, corrs[0]); + stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; + stats->staop[slot_idx] = mystats->ltopr; + stats->stanumbers[slot_idx] = corrs; + stats->numnumbers[slot_idx] = 1; + slot_idx++; + } + else if (values_cnt>1) // XXX && fetchfunc==ind_fetch_func? // correlation is now strictly a per-index attribute and not an per-column one + { + double fudge=1; + + if (fetchfunc==ind_fetch_func) { + /* Compute correlation fudge factor for indices with + * high number of duplicate values for an index column, + * causing index scan to be highly random, due to btree + * random insertion logic being used to avoid O(N^2) + * insertion behavior in that case. + */ + fudge = 1-idx_corr_fudge(RelationIdGetRelation(stats->attr->attrelid), samplerows); + } + corrs[0] = correlation(values_cnt, corr_xysum); + // XXX: corrs[0] *= fudge; stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION; stats->staop[slot_idx] = mystats->ltopr; stats->stanumbers[slot_idx] = corrs; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index eb653cf..4529725 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -693,8 +693,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* * Now interpolate based on estimated index order correlation to get total * disk I/O cost for main table accesses. + * Note the sign of this expression: as csquared approaches 0, the + * estimate approaches max_IO_cost estimate; */ csquared = indexCorrelation * indexCorrelation; + elog(WARNING, "HERE 1222: csquared=%f minIO/R-P-C=%f maxIO/R-P-C=%f %s %s %d", + csquared, min_IO_cost/spc_random_page_cost, max_IO_cost/spc_random_page_cost, + __FILE__, __FUNCTION__, __LINE__); run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e103f5e..f097580 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6874,9 +6874,34 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, */ MemSet(&vardata, 0, sizeof(vardata)); - if (index->indexkeys[0] != 0) + /* Expression --- maybe there are stats for the index itself */ + relid = index->indexoid; + colnum = 1; + if (get_index_stats_hook && + (*get_index_stats_hook) (root, relid, colnum, &vardata)) + { + /* + * The hook took control of acquiring a stats tuple. If it did + * supply a tuple, it'd better have supplied a freefunc. + */ + elog(WARNING, "HERE 1223: indexCorrelation %s %s %d", __FILE__, __FUNCTION__, __LINE__); + + if (HeapTupleIsValid(vardata.statsTuple) && + !vardata.freefunc) + elog(ERROR, "no function provided to release variable stats with"); + } + else if ( NULL != (vardata.statsTuple = SearchSysCache3(STATRELATTINH, + ObjectIdGetDatum(relid), + Int16GetDatum(colnum), + BoolGetDatum(false)))) + { + elog(WARNING, "HERE 1224: indexCorrelation %s %s %d", __FILE__, __FUNCTION__, __LINE__); + vardata.freefunc = ReleaseSysCache; + } + else if (index->indexkeys[0] != 0) { /* Simple variable --- look to stats for the underlying table */ + elog(WARNING, "HERE 1225: indexCorrelation %s %s %d", __FILE__, __FUNCTION__, __LINE__); RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); Assert(rte->rtekind == RTE_RELATION); @@ -6904,32 +6929,6 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, vardata.freefunc = ReleaseSysCache; } } - else - { - /* Expression --- maybe there are stats for the index itself */ - relid = index->indexoid; - colnum = 1; - - if (get_index_stats_hook && - (*get_index_stats_hook) (root, relid, colnum, &vardata)) - { - /* - * The hook took control of acquiring a stats tuple. If it did - * supply a tuple, it'd better have supplied a freefunc. - */ - if (HeapTupleIsValid(vardata.statsTuple) && - !vardata.freefunc) - elog(ERROR, "no function provided to release variable stats with"); - } - else - { - vardata.statsTuple = SearchSysCache3(STATRELATTINH, - ObjectIdGetDatum(relid), - Int16GetDatum(colnum), - BoolGetDatum(false)); - vardata.freefunc = ReleaseSysCache; - } - } if (HeapTupleIsValid(vardata.statsTuple)) {