diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index be4bbc7..46121a7 100644 *** a/doc/src/sgml/catalogs.sgml --- b/doc/src/sgml/catalogs.sgml *************** *** 8279,8286 **** A list of the most common values in the column. (Null if no values seem to be more common than any others.) - For some data types such as tsvector, this is a list of - the most common element values rather than values of the type itself. --- 8279,8284 ---- *************** *** 8289,8300 **** real[] ! A list of the frequencies of the most common values or elements, i.e., number of occurrences of each divided by total number of rows. (Null when most_common_vals is.) - For some data types such as tsvector, it can also store some - additional information, making it longer than the - most_common_vals array. --- 8287,8295 ---- real[] ! A list of the frequencies of the most common values, i.e., number of occurrences of each divided by total number of rows. (Null when most_common_vals is.) *************** *** 8326,8331 **** --- 8321,8358 ---- type does not have a < operator.) + + + most_common_elems + anyarray + + + A list of element values most often appearing within values of the + column. (Null for scalar types.) + + + + + most_common_freqs + real[] + + + A list of the frequencies of the most common element values, i.e., the + fraction of rows containing at least one of the given element. Two or + four additional values follow those; they bear type-specific summary + information. + + + + + length_histogram_bounds + int[] + + + For arrays, it holds a list of histogram bounds of + distinct elements count in array. For other datatypes, it is null. + + diff --git a/src/backend/catalog/index dc801ae..cdc4317 100644 *** a/src/backend/catalog/heap.c --- b/src/backend/catalog/heap.c *************** *** 45,50 **** --- 45,51 ---- #include "catalog/pg_namespace.h" #include "catalog/pg_statistic.h" #include "catalog/pg_tablespace.h" + #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "catalog/pg_type_fn.h" #include "catalog/storage.h" *************** *** 1182,1188 **** heap_create_with_catalog(const char *relname, F_ARRAY_SEND, /* array send (bin) proc */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! InvalidOid, /* analyze procedure - default */ new_type_oid, /* array element type - the rowtype */ true, /* yes, this is an array type */ InvalidOid, /* this has no array type */ --- 1183,1189 ---- F_ARRAY_SEND, /* array send (bin) proc */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! F_ARRAY_TYPANALYZE, /* special analyze procedure for arrays */ new_type_oid, /* array element type - the rowtype */ true, /* yes, this is an array type */ InvalidOid, /* this has no array type */ diff --git a/src/backend/catalog/index 50ba20c..3fea98f 100644 *** a/src/backend/catalog/system_views.sql --- b/src/backend/catalog/system_views.sql *************** *** 117,145 **** CREATE VIEW pg_stats AS stawidth AS avg_width, stadistinct AS n_distinct, CASE ! WHEN stakind1 IN (1, 4) THEN stavalues1 ! WHEN stakind2 IN (1, 4) THEN stavalues2 ! WHEN stakind3 IN (1, 4) THEN stavalues3 ! WHEN stakind4 IN (1, 4) THEN stavalues4 END AS most_common_vals, CASE ! WHEN stakind1 IN (1, 4) THEN stanumbers1 ! WHEN stakind2 IN (1, 4) THEN stanumbers2 ! WHEN stakind3 IN (1, 4) THEN stanumbers3 ! WHEN stakind4 IN (1, 4) THEN stanumbers4 END AS most_common_freqs, CASE WHEN stakind1 = 2 THEN stavalues1 WHEN stakind2 = 2 THEN stavalues2 WHEN stakind3 = 2 THEN stavalues3 WHEN stakind4 = 2 THEN stavalues4 END AS histogram_bounds, CASE WHEN stakind1 = 3 THEN stanumbers1[1] WHEN stakind2 = 3 THEN stanumbers2[1] WHEN stakind3 = 3 THEN stanumbers3[1] WHEN stakind4 = 3 THEN stanumbers4[1] ! END AS correlation FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid) JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum) LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) --- 117,170 ---- stawidth AS avg_width, stadistinct AS n_distinct, CASE ! WHEN stakind1 = 1 THEN stavalues1 ! WHEN stakind2 = 1 THEN stavalues2 ! WHEN stakind3 = 1 THEN stavalues3 ! WHEN stakind4 = 1 THEN stavalues4 ! WHEN stakind5 = 1 THEN stavalues5 END AS most_common_vals, CASE ! WHEN stakind1 = 1 THEN stanumbers1 ! WHEN stakind2 = 1 THEN stanumbers2 ! WHEN stakind3 = 1 THEN stanumbers3 ! WHEN stakind4 = 1 THEN stanumbers4 ! WHEN stakind5 = 1 THEN stanumbers5 END AS most_common_freqs, CASE WHEN stakind1 = 2 THEN stavalues1 WHEN stakind2 = 2 THEN stavalues2 WHEN stakind3 = 2 THEN stavalues3 WHEN stakind4 = 2 THEN stavalues4 + WHEN stakind5 = 2 THEN stavalues5 END AS histogram_bounds, CASE WHEN stakind1 = 3 THEN stanumbers1[1] WHEN stakind2 = 3 THEN stanumbers2[1] WHEN stakind3 = 3 THEN stanumbers3[1] WHEN stakind4 = 3 THEN stanumbers4[1] ! WHEN stakind5 = 3 THEN stanumbers5[1] ! END AS correlation, ! CASE ! WHEN stakind1 = 4 THEN stavalues1 ! WHEN stakind2 = 4 THEN stavalues2 ! WHEN stakind3 = 4 THEN stavalues3 ! WHEN stakind4 = 4 THEN stavalues4 ! WHEN stakind5 = 4 THEN stavalues5 ! END AS most_common_elems, ! CASE ! WHEN stakind1 = 4 THEN stanumbers1 ! WHEN stakind2 = 4 THEN stanumbers2 ! WHEN stakind3 = 4 THEN stanumbers3 ! WHEN stakind4 = 4 THEN stanumbers4 ! WHEN stakind5 = 4 THEN stanumbers5 ! END AS most_common_elem_freqs, ! CASE ! WHEN stakind1 = 5 THEN stavalues1 ! WHEN stakind2 = 5 THEN stavalues2 ! WHEN stakind3 = 5 THEN stavalues3 ! WHEN stakind4 = 5 THEN stavalues4 ! WHEN stakind5 = 5 THEN stavalues5 ! END AS length_histogram_bounds FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid) JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum) LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace) diff --git a/src/backend/commands/analyze.cindex b40e57b..bfe5683 100644 *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** *** 110,117 **** static void update_attstats(Oid relid, bool inh, static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); - static bool std_typanalyze(VacAttrStats *stats); - /* * analyze_rel() -- analyze one relation --- 110,115 ---- *************** *** 1794,1800 **** static int compare_mcvs(const void *a, const void *b); /* * std_typanalyze -- the default type-specific typanalyze function */ ! static bool std_typanalyze(VacAttrStats *stats) { Form_pg_attribute attr = stats->attr; --- 1792,1798 ---- /* * std_typanalyze -- the default type-specific typanalyze function */ ! bool std_typanalyze(VacAttrStats *stats) { Form_pg_attribute attr = stats->attr; diff --git a/src/backend/commands/typindex 0f8af31..49ea30f 100644 *** a/src/backend/commands/typecmds.c --- b/src/backend/commands/typecmds.c *************** *** 609,615 **** DefineType(List *names, List *parameters) F_ARRAY_SEND, /* send procedure */ typmodinOid, /* typmodin procedure */ typmodoutOid, /* typmodout procedure */ ! InvalidOid, /* analyze procedure - default */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ --- 609,615 ---- F_ARRAY_SEND, /* send procedure */ typmodinOid, /* typmodin procedure */ typmodoutOid, /* typmodout procedure */ ! F_ARRAY_TYPANALYZE, /* special analyze procedure for arrays */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ *************** *** 1140,1146 **** DefineEnum(CreateEnumStmt *stmt) F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! InvalidOid, /* analyze procedure - default */ enumTypeOid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ --- 1140,1146 ---- F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! F_ARRAY_TYPANALYZE, /* special analyze procedure for arrays */ enumTypeOid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ *************** *** 1450,1456 **** DefineRange(CreateRangeStmt *stmt) F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! InvalidOid, /* analyze procedure - default */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ --- 1450,1456 ---- F_ARRAY_SEND, /* send procedure */ InvalidOid, /* typmodin procedure - none */ InvalidOid, /* typmodout procedure - none */ ! F_ARRAY_TYPANALYZE, /* special analyze procedure for arrays */ typoid, /* element type ID */ true, /* yes this is an array type */ InvalidOid, /* no further array type */ diff --git a/src/backend/utils/adt/Makindex 5f968b0..0c13d75 100644 *** a/src/backend/utils/adt/Makefile --- b/src/backend/utils/adt/Makefile *************** *** 15,21 **** override CFLAGS+= -mieee endif endif ! OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \ cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o like.o lockfuncs.o \ --- 15,22 ---- endif endif ! OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o \ ! array_selfuncs.o array_typanalyze.o bool.o \ cash.o char.o date.o datetime.o datum.o domains.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o like.o lockfuncs.o \ diff --git a/src/backend/utils/adt/arnew file mode 100644 index 0000000..886516b *** /dev/null --- b/src/backend/utils/adt/array_selfuncs.c *************** *** 0 **** --- 1,970 ---- + /*------------------------------------------------------------------------- + * + * array_selfuncs.c + * Functions for selectivity estimation of array operators. + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_selfuncs.c + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/hash.h" + #include "catalog/pg_am.h" + #include "catalog/pg_collation.h" + #include "catalog/pg_operator.h" + #include "commands/defrem.h" + #include "commands/vacuum.h" + #include "utils/array.h" + #include "utils/builtins.h" + #include "utils/lsyscache.h" + #include "utils/selfuncs.h" + #include "utils/typcache.h" + + /* Default selectivity constant for "@>" and "<@" operators */ + #define DEFAULT_CONTAIN_SEL 0.005 + + /* Default selectivity constant for "&&" operator */ + #define DEFAULT_OVERLAP_SEL 0.01 + + /* Default selectivity for given operator */ + #define DEFAULT_SEL(operator) \ + ((operator) == OID_ARRAY_OVERLAP_OP ? \ + DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL) + + /* Macro for selectivity estimation to be used if we have no statistics */ + #define array_selec_no_stats(array,nitems,op,cmpfunc) \ + mcelem_array_selec(array, nitems, typentry, NULL, 0, NULL, 0, NULL, 0, op, cmpfunc) + + static Selectivity calc_arraysel(VariableStatData *vardata, Datum constval, + Oid operator); + static Selectivity mcelem_array_selec(ArrayType *array, int nitems, + TypeCacheEntry *typentry, Datum *mcelem, int nmcelem, + float4 *numbers, int nnumbers, Datum *hist, int nhist, + Oid operator, FunctionCallInfo cmpfunc); + static int element_compare(const void *key1, const void *key2, void *arg); + static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, + int *index, FunctionCallInfo cmpfunc); + static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, + int nmcelem, float4 *numbers, Datum *array_data, int nitems, Oid operator, + FunctionCallInfo cmpfunc); + static float calc_hist(Datum *hist, int nhist, float *hist_part, int n); + static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, Datum *array_data, int nitems, + Datum *hist, int nhist, Oid operator, + FunctionCallInfo cmpfunc); + static float *calc_distr(float *p, int n, int m, float rest); + + /* selectivity for "const op ANY(column)" and "const op ALL(column)" */ + Selectivity + calc_scalararraysel(VariableStatData *vardata, Datum constval, bool orClause, + Oid operator) + { + Oid elemtype; + Selectivity selec; + TypeCacheEntry *typentry; + Datum *hist; + int nhist; + FunctionCallInfoData cmpfunc; + + elemtype = get_base_element_type(vardata->vartype); + + + /* Get default comparison function */ + typentry = lookup_type_cache(elemtype, + TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO | TYPECACHE_EQ_OPR); + + /* Handle only "=" operator. Return default selectivity in other cases. */ + if (operator != typentry->eq_opr) + return (Selectivity) 0.5; + + /* Without a comparison function, return default selectivity estimation */ + if (!OidIsValid(typentry->cmp_proc)) + return DEFAULT_CONTAIN_SEL; + + InitFunctionCallInfoData(cmpfunc, &typentry->cmp_proc_finfo, 2, + DEFAULT_COLLATION_OID, NULL, NULL); + + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + /* MCELEM will be an array of same type as element */ + if (get_attstatsslot(vardata->statsTuple, + elemtype, vardata->atttypmod, + STATISTIC_KIND_MCELEM, InvalidOid, + NULL, + &values, &nvalues, + &numbers, &nnumbers)) + { + /* For const = ALL(column) get histogram of distinct element count */ + if (orClause + || !get_attstatsslot(vardata->statsTuple, + INT4OID, -1, + STATISTIC_KIND_LENGTH_HISTOGRAM, InvalidOid, + NULL, + &hist, &nhist, + NULL, NULL)) + { + hist = NULL; + nhist = 0; + } + + /* Use the most-common-elements slot for the array Var. */ + if (orClause) + selec = mcelem_array_contain_overlap_selec(values, nvalues, + numbers, &constval, 1, OID_ARRAY_CONTAIN_OP, &cmpfunc); + else + selec = mcelem_array_contained_selec(values, nvalues, numbers, + &constval, 1, hist, nhist, + OID_ARRAY_CONTAINED_OP, &cmpfunc); + if (hist) + free_attstatsslot(INT4OID, hist, nhist, NULL, 0); + free_attstatsslot(elemtype, values, nvalues, numbers, nnumbers); + } + else + { + /* No most-common-elements info, so do without */ + if (orClause) + selec = mcelem_array_contain_overlap_selec(NULL, 0, + NULL, &constval, 1, OID_ARRAY_CONTAIN_OP, &cmpfunc); + else + selec = mcelem_array_contained_selec(NULL, 0, NULL, &constval, + 1, NULL, 0, OID_ARRAY_CONTAINED_OP, &cmpfunc); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, &constval, + 1, OID_ARRAY_CONTAIN_OP, &cmpfunc); + /* we assume no nulls here, so no stanullfrac correction */ + } + + return selec; + } + + /* + * arraysel -- restriction selectivity for "column @> const", "column && const" + * and "column <@ const" + */ + Datum + arraysel(PG_FUNCTION_ARGS) + { + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Selectivity selec; + Oid element_typeid; + + /* + * If expression is not (variable op pseudoconstant) or (pseudoconstant op + * variable), then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + + /* + * Can't do anything useful if the something is not a constant, either. + */ + if (!IsA(other, Const)) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); + } + + /* + * The "&&", "@>" and "<@" operators are strict, so we can cope with NULL + * right away. + */ + if (((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + if (!varonleft && operator == OID_ARRAY_CONTAIN_OP) + operator = OID_ARRAY_CONTAINED_OP; + + /* + * OK, there's a Var and a Const we're dealing with here. We need the + * Const to be a array with same element type as column, else we can't do + * anything useful. + */ + element_typeid = get_base_element_type(((Const *) other)->consttype); + if (element_typeid != InvalidOid && + element_typeid == get_base_element_type(vardata.vartype)) + { + selec = calc_arraysel(&vardata, ((Const *) other)->constvalue, + operator); + } + else + { + /* If we can't see the query structure, must punt */ + selec = DEFAULT_SEL(operator); + } + + ReleaseVariableStats(vardata); + + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); + } + + /* + * Calculate selectivity for "column @> const", "column && const" and + * "column <@ const" based on the statistics. + */ + static Selectivity + calc_arraysel(VariableStatData *vardata, Datum constval, Oid operator) + { + Selectivity selec; + ArrayType *array; + int ndims; + int *dims; + int nitems; + TypeCacheEntry *typentry; + FunctionCallInfoData cmpfunc; + + /* + * The caller made sure the const is a array with same element type, so + * get it now + */ + array = DatumGetArrayTypeP(constval); + ndims = ARR_NDIM(array); + dims = ARR_DIMS(array); + nitems = ArrayGetNItems(ndims, dims); + + /* Get default comparison function */ + typentry = lookup_type_cache(array->elemtype, + TYPECACHE_CMP_PROC | TYPECACHE_CMP_PROC_FINFO); + + if (!OidIsValid(typentry->cmp_proc)) + return DEFAULT_SEL(operator); + + InitFunctionCallInfoData(cmpfunc, &typentry->cmp_proc_finfo, 2, + DEFAULT_COLLATION_OID, NULL, NULL); + + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + Datum *values; + int nvalues; + Datum *hist; + int nhist; + float4 *numbers; + int nnumbers; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + + /* MCELEM will be an array of same type as column */ + if (get_attstatsslot(vardata->statsTuple, + array->elemtype, vardata->atttypmod, + STATISTIC_KIND_MCELEM, InvalidOid, + NULL, + &values, &nvalues, + &numbers, &nnumbers)) + { + /* + * For "array <@ const" case we also need histogram of distinct + * element counts. + */ + if (operator != OID_ARRAY_CONTAINED_OP + || !get_attstatsslot(vardata->statsTuple, + INT4OID, -1, + STATISTIC_KIND_LENGTH_HISTOGRAM, + InvalidOid, + NULL, + &hist, &nhist, + NULL, NULL)) + { + hist = NULL; + nhist = 0; + } + + /* Use the most-common-elements slot for the array Var. */ + selec = mcelem_array_selec(array, nitems, typentry, values, nvalues, + numbers, nnumbers, hist, nhist, operator, &cmpfunc); + free_attstatsslot(array->elemtype, values, nvalues, numbers, + nnumbers); + } + else + { + /* No most-common-elements info, so do without */ + selec = array_selec_no_stats(array, nitems, operator, &cmpfunc); + } + + /* + * MCE stats count only non-null rows, so adjust for null rows. + */ + selec *= (1.0 - stats->stanullfrac); + } + else + { + /* No stats at all, so do without */ + selec = array_selec_no_stats(array, nitems, operator, &cmpfunc); + /* we assume no nulls here, so no stanullfrac correction */ + } + + return selec; + } + + /* + * find_next_mcelem binary-searches a most common elements array, starting + * from *index, for the first member >= value. It saves the position of the + * match into *index and returns true if it's an exact match. + */ + static bool + find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index, + FunctionCallInfo cmpfunc) + { + int l = *index, + r = nmcelem - 1, + i, + res; + + while (l <= r) + { + i = (l + r) / 2; + res = element_compare(&mcelem[i], &value, cmpfunc); + if (res == 0) + { + *index = i; + return true; + } + else if (res < 0) + l = i + 1; + else + r = i - 1; + } + *index = l; + return false; + } + + /* Array selectivity estimation based on most common elements statistics. */ + static Selectivity + mcelem_array_selec(ArrayType *array, int nitems, TypeCacheEntry *typentry, + Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *hist, + int nhist, Oid operator, FunctionCallInfo cmpfunc) + { + int i; + char *ptr; + bits8 *bitmap; + int bitmask; + Datum *array_data; + bool null_present; + int nonnull_nitems; + + /* + * There should be four more Numbers than Values, because the last four + * cells are taken for nulls, minimal frequency, maximal frequency, and + * average distinct element count. Punt if not. + */ + if (nnumbers != nmcelem + 4) + mcelem = NULL; + + if (!mcelem) + nmcelem = 0; + + /* + * Prepare constant array data for sorting. Sorting lets us find unique + * elements and efficiently merge with the MCELEM array. + */ + array_data = (Datum *) palloc(sizeof(Datum) * nitems); + bitmap = ARR_NULLBITMAP(array); + ptr = ARR_DATA_PTR(array); + bitmask = 1; + nonnull_nitems = 0; + null_present = false; + for (i = 0; i < nitems; i++) + { + if (bitmap && (*bitmap & bitmask) == 0) + null_present = true; + else + { + /* Extract array data */ + array_data[nonnull_nitems] = fetch_att(ptr, typentry->typbyval, + typentry->typlen); + ptr = att_addlength_pointer(ptr, typentry->typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typentry->typalign); + nonnull_nitems++; + } + /* Adjust bitmask and bitmap pointer */ + bitmask <<= 1; + if (bitmask == 0x100) + { + if (bitmap) + bitmap++; + bitmask = 1; + } + } + + /* Query "column @> '{smth., null}'" matches nothing. */ + if (null_present && operator == OID_ARRAY_CONTAIN_OP) + return 0.0; + + /* Sort extracted elements using their default comparison function. */ + qsort_arg(array_data, nonnull_nitems, sizeof(Datum), element_compare, cmpfunc); + + /* "column @> const" and "column && const" cases */ + if (operator == OID_ARRAY_CONTAIN_OP || operator == OID_ARRAY_OVERLAP_OP) + return mcelem_array_contain_overlap_selec(mcelem, nmcelem, numbers, + array_data, nonnull_nitems, operator, cmpfunc); + + /* "column <@ const" case */ + if (operator == OID_ARRAY_CONTAINED_OP) + return mcelem_array_contained_selec(mcelem, nmcelem, numbers, + array_data, nonnull_nitems, hist, nhist, operator, cmpfunc); + + elog(ERROR, "arraysel call for invalid operator (oid = %d)", operator); + return 0.0; /* keep compiler quiet */ + } + + /* Fast function for floor value of 2 based logarithm calculation. */ + static int + floor_log2(uint32 n) + { + int pos = 0; + + if (n == 0) + return -1; + if (n >= 1 << 16) + { + n >>= 16; + pos += 16; + } + if (n >= 1 << 8) + { + n >>= 8; + pos += 8; + } + if (n >= 1 << 4) + { + n >>= 4; + pos += 4; + } + if (n >= 1 << 2) + { + n >>= 2; + pos += 2; + } + if (n >= 1 << 1) + { + pos += 1; + } + return pos; + } + + /* + * Estimate selectivity of "column @> const" and "column && const" based on + * most common element statistics. This estimation assumes element + * occurrences are independent. + * + * TODO: this estimation probably could be improved by using the distinct + * element count histogram. For example, excepting the special case of + * "column @> '{}'", we can multiply the calculated selectivity by the + * fraction of nonempty arrays in the column. + */ + static Selectivity + mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, + float4 *numbers, Datum *array_data, int nitems, + Oid operator, FunctionCallInfo cmpfunc) + { + Selectivity selec, + elem_selec; + int mcelem_index, + i; + bool use_bsearch; + float4 minfreq; + + if (mcelem) + { + /* + * Grab the lowest frequency. compute_array_stats() stored it as the + * second trailing number. + */ + minfreq = numbers[nmcelem + 1]; + } + else + { + /* + * Without statistics set minfreq so that minfreq / 2 = + * DEFAULT_CONTAIN_SEL + */ + minfreq = 2 * DEFAULT_CONTAIN_SEL; + } + + /* Decide whether it is faster to use binary search or not. */ + if (nitems * floor_log2((unsigned int) nmcelem) < nmcelem + nitems) + use_bsearch = true; + else + use_bsearch = false; + + if (operator == OID_ARRAY_CONTAIN_OP) + { + /* + * Initial selectivity for "column @> const" query is 1.0, and it will + * be decreased with each element of constant array. + */ + selec = 1.0; + } + else + { + /* + * Initial selectivity for "column && const" query is 0.0, and it will + * be increased with each element of constant array. + */ + selec = 0.0; + } + mcelem_index = 0; + for (i = 0; i < nitems; i++) + { + bool found = false; + + /* Compare with previous value and skip duplicates. */ + if (i > 0 && + !element_compare(&array_data[i - 1], &array_data[i], cmpfunc)) + continue; + + /* Find the smallest MCELEM >= this. */ + if (use_bsearch) + { + found = find_next_mcelem(mcelem, nmcelem, array_data[i], + &mcelem_index, cmpfunc); + } + else + { + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], + &array_data[i], cmpfunc); + + if (cmp < 0) + mcelem_index++; + else + { + /* mcelem is found */ + if (cmp == 0) + found = true; + break; + } + } + } + + if (found) + { + /* MCELEM is found; use its frequency. */ + elem_selec = numbers[mcelem_index]; + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); + } + + /* + * Adjust overall selectivity using the current element's selectivity + * and an assumption of element occurrence independence. + */ + if (operator == OID_ARRAY_CONTAIN_OP) + selec *= elem_selec; + else + selec = selec + elem_selec - selec * elem_selec; + } + + /* Clamp intermediate results to stay sane despite roundoff error */ + CLAMP_PROBABILITY(selec); + + return selec; + } + + /* + * Calculate the first n distinct element count probabilities from a + * histogram. We assume that a histogram box with bounds a and b gives 1 / + * ((b - a + 1) * (nhist - 1)) probability to each value in (a,b) and an + * additional half of that to a and b themselves. Returns the probability + * that the distinct element count is <= n. + */ + static float + calc_hist(Datum *hist, int nhist, float *hist_part, int n) + { + int k, + i = 0, + prev_interval = 0, + next_interval = 0; + float frac, + total = 0.0f; + + /* + * frac is a probability contribution by each interval between histogram + * values. We have nhist - 1 intervals. Contribution of one will be 1 / + * (nhist - 1). + */ + frac = 1.0f / ((float) (nhist - 1)); + for (k = 0; k <= n; k++) + { + int count = 0; + + /* Count the histogram boundaries precisely equal to k. */ + while (i < nhist && DatumGetInt32(hist[i]) <= k) + { + if (DatumGetInt32(hist[i]) == k) + count++; + i++; + } + + if (count > 0) + { + /* k is an exact bound for at least one histogram box. */ + float val; + + /* Find length between current histogram value and the next one */ + if (i < nhist) + next_interval = DatumGetInt32(hist[i + 1]) - + DatumGetInt32(hist[i]); + else + next_interval = 0; + + /* + * count - 1 histogram boxes contain k exclusively. They + * contribute a total of (count - 1) * frac probability. Also + * factor in the partial histogram boxes on either side. + */ + val = (float) (count - 1); + if (next_interval > 0) + val += 0.5f / ((float) next_interval); + if (prev_interval > 0) + val += 0.5f / ((float) prev_interval); + hist_part[k] = frac * val; + prev_interval = next_interval; + } + else + { + /* k does not appear as an exact histogram bound. */ + if (prev_interval == 0) + hist_part[k] = 0.0f; + else + hist_part[k] = frac / ((float) prev_interval); + } + /* Accumulate total probability. */ + total += hist_part[k]; + } + return total; + } + + /* + * Consider n independent events with probabilities p. This function + * calculates probabilities of exact k of events occurrence for k in [0;m]. + * Imagine matrix M of (n + 1) x (m + 1) size. Element M[i,j] denotes the + * probability that exactly j of first i events occur. Obviously M[0,0] = 1. + * For any constant j, each increment of i increases the probability iff the + * event occurs. So, by the law of total probability: M[i,j] = M[i - 1, j] * + * (1 - p[i]) + M[i - 1, j - 1] * p[i] for i > 0, j > 0. M[i,0] = M[i - 1, 0] + * * (1 - p[i]) for i > 0. "rest" is the sum of the probabilities of all + * low-probability events not included in p. + */ + static float * + calc_distr(float *p, int n, int m, float rest) + { + float *row, + *prev_row, + *tmp; + int i, + j; + + /* + * Since we return only the last row of the matrix and need only the + * current and previous row for calculations, allocate two rows. + */ + row = (float *) palloc(2 * (m + 1) * sizeof(float)); + prev_row = row + (m + 1); + + /* M[0,0] = 1 */ + row[0] = 1.0f; + for (i = 1; i <= n; i++) + { + float t = p[i - 1]; + + /* Swap rows */ + tmp = row; + row = prev_row; + prev_row = tmp; + /* Calculate next row */ + for (j = 0; j <= i && j <= m; j++) + { + float val = 0.0f; + + if (j < i) + val += prev_row[j] * (1.0f - t); + if (j > 0) + val += prev_row[j - 1] * t; + row[j] = val; + + } + } + + /* Take care about events with low probabilities. */ + if (rest > 0.0f) + { + /* + * The probability of no occurrence of events contributing to the + * "rest" probability has a limit of exp(-rest) when the number of + * events is high. Another simplification is to replace those events + * with one event having (1 - exp(-rest)) probability. + */ + rest = 1.0f - exp(-rest); + for (i = 0; i <= m; i++) + { + if (i < m) + row[i + 1] += row[i] * rest; + row[i] *= (1.0f - rest); + } + } + return row; + } + + /* + * Estimate selectivity of "column <@ const" based on most common element + * statistics. Independent element occurrence would imply a particular + * distribution of distinct element counts among matching rows. Real data + * usually falsifies that assumption. For example, in a set of 1-element + * integer arrays having elements in the range [0;10], element occurrences are + * not independent. If they were, a sufficiently-large set would include all + * distinct element counts 0 through 11. We correct for this using the + * histogram of distinct element counts. + * + * In the "column @> const" and "column && const" cases, we usually have + * "const" with low summary frequency of elements (otherwise we have + * selectivity close to 0 or 1 correspondingly). That's why the effect of + * dependence related to distinct element counts distribution is negligible + * there. In the "column <@ const" case, summary frequency of elements is + * high (otherwise we have selectivity close to 0). That's why we should do + * correction due to array distinct element counts distribution. + */ + static Selectivity + mcelem_array_contained_selec(Datum *mcelem, int nmcelem, + float4 *numbers, Datum *array_data, int nitems, + Datum *hist, int nhist, Oid operator, + FunctionCallInfo cmpfunc) + { + int mcelem_index, + i, + unique_nitems = 0; + float selec, + minfreq, + default_freq, + nullelem_freq; + float *dist, + *mcelem_dist, + *hist_part; + float avg_count, + mult, + rest; + float *elem_selec; + + /* + * elem_selec is array of estimated frequencies for elements in the + * constant. + */ + elem_selec = (float *) palloc(sizeof(float) * nitems); + + if (mcelem) + { + /* + * Grab some of the summary statistics that compute_array_stats() + * stores: frequency of the null elements, lowest frequency, and + * average distinct element count. + */ + nullelem_freq = numbers[nmcelem]; + minfreq = numbers[nmcelem + 1]; + avg_count = numbers[nmcelem + 3]; + } + else + { + /* + * Without statistics set minfreq so that minfreq / 2 = + * DEFAULT_CONTAIN_SEL + */ + nullelem_freq = 0.0f; + minfreq = 2 * DEFAULT_CONTAIN_SEL; + avg_count = 10.0f; + } + + /* + * "rest" will be the sum of the frequencies of all elements not + * represented in MCELEM. The average distinct element count is the sum + * of the frequencies of *all* elements. Begin with that; we will proceed + * to subtract the MCELEM frequencies. + */ + rest = avg_count; + + default_freq = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); + + mcelem_index = 0; + + /* + * mult is the multiplier that presents estimate of probability that each + * mcelem which is not present in constant doesn't occur. + */ + mult = 1.0f; + + for (i = 0; i < nitems; i++) + { + bool found = false; + + /* Compare with previous value and skip duplicates. */ + if (i > 0 && + !element_compare(&array_data[i - 1], &array_data[i], cmpfunc)) + continue; + unique_nitems++; + + /* + * Iterate over MCELEM until we find an entry greater than or equal to + * this element of the constant. Simultaneously update "rest" and + * "mult". If we find an exact match, update elem_selec. + */ + while (mcelem_index < nmcelem) + { + int cmp = element_compare(&mcelem[mcelem_index], &array_data[i], + cmpfunc); + + if (cmp < 0) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + else + { + if (cmp == 0) + { + elem_selec[unique_nitems - 1] = numbers[mcelem_index]; + rest -= numbers[mcelem_index]; + found = true; + } + break; + } + } + + if (found) + { + mcelem_index++; + } + else + { + /* + * The element is not in MCELEM. Punt, but assume that the + * selectivity cannot be more than minfreq / 2. + */ + elem_selec[unique_nitems - 1] = Min(DEFAULT_CONTAIN_SEL, + minfreq / 2); + } + } + + /* + * If we handled all constant elements without exhausting the MCELEM + * array, finish walking it to complete "rest" and "mult". + */ + while (mcelem_index < nmcelem) + { + mult *= (1.0f - numbers[mcelem_index]); + rest -= numbers[mcelem_index]; + mcelem_index++; + } + + /* + * We should take care about elements which aren't in mcelem... somehow... + */ + mult *= exp(-rest); + + /* + * Using the distinct element count histogram requires O(nitems * (nmcelem + * + nitems)) operations. Beyond a certain computational cost threshold, + * it's reasonable to sacrifice accuracy for decreased plan time. + */ + if (nhist > 0 && unique_nitems <= + 300 * default_statistics_target / (nmcelem + unique_nitems)) + { + /* + * Calculate probabilities of each distinct element count for both + * mcelems and constant elements. At this point, assume independent + * element occurrence. + */ + dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); + mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); + + hist_part = (float *) palloc((unique_nitems + 1) * sizeof(float)); + calc_hist(hist, nhist, hist_part, unique_nitems); + + selec = 0.0f; + + for (i = 0; i <= unique_nitems; i++) + { + /* + * mult * dist[i] / mcelem_dist[i] gives us probability of qual + * matching from assumption of independent element occurrence with + * the condition that distinct element count = i. + */ + if (hist_part[i] > 0) + selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; + } + } + else + { + /* We don't have histogram. Use a rough estimate. */ + selec = mult; + } + + /* Take into account occurrence of NULL element. */ + selec *= (1.0f - nullelem_freq); + + CLAMP_PROBABILITY(selec); + + return selec; + } + + /* + * Comparison function for elements. Based on default comparison function for + * array element data type. + */ + static int + element_compare(const void *key1, const void *key2, void *arg) + { + const Datum *d1 = (const Datum *) key1; + const Datum *d2 = (const Datum *) key2; + FunctionCallInfo cmpf = (FunctionCallInfo) arg; + + cmpf->arg[0] = *d1; + cmpf->arg[1] = *d2; + cmpf->argnull[0] = false; + cmpf->argnull[1] = false; + cmpf->isnull = false; + + return DatumGetInt32(FunctionCallInvoke(cmpf)); + } diff --git a/src/backend/utils/adt/array_typanew file mode 100644 index 0000000..8b9a1a8 *** /dev/null --- b/src/backend/utils/adt/array_typanalyze.c *************** *** 0 **** --- 1,759 ---- + /*------------------------------------------------------------------------- + * + * array_typanalyze.c + * functions for gathering statistics from array columns + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/array_typanalyze.c + * + *------------------------------------------------------------------------- + */ + + #include "postgres.h" + + #include "access/hash.h" + #include "access/tuptoaster.h" + #include "catalog/pg_am.h" + #include "catalog/pg_collation.h" + #include "catalog/pg_operator.h" + #include "commands/vacuum.h" + #include "commands/defrem.h" + #include "parser/parse_oper.h" + #include "utils/array.h" + #include "utils/builtins.h" + #include "utils/datum.h" + #include "utils/hsearch.h" + #include "utils/lsyscache.h" + #include "utils/selfuncs.h" + #include "utils/typcache.h" + + /* + * To avoid consuming too much memory, IO and CPU load during analysis, we + * ignore arrays that are wider than WIDTH_THRESHOLD (after detoasting!). + */ + #define WIDTH_THRESHOLD 0x10000 + + /* Extra data for compute_array_stats function */ + typedef struct + { + /* Information about element type */ + Oid type_id; + Oid eq_opr; + bool typbyval; + int16 typlen; + char typalign; + FunctionCallInfoData cmp, + eq, + hash; + FmgrInfo hash_func_info; + + /* std_typanalyze() state */ + void *std_extra_data; + void (*std_compute_stats) (VacAttrStatsP stats, + AnalyzeAttrFetchFunc fetchfunc, + int samplerows, + double totalrows); + } ArrayAnalyzeExtraData; + + static ArrayAnalyzeExtraData *extra_data; + + /* A hash table entry for the Lossy Counting algorithm */ + typedef struct + { + Datum key; /* This is 'e' from the LC algorithm. */ + int last_container; /* Supports deduplication. */ + int frequency; /* This is 'f'. */ + int delta; /* And this is 'delta'. */ + } TrackItem; + + /* A entry for distinct element count hash table */ + typedef struct + { + int count; + int frequency; + } DistinctElementCountItem; + + static void compute_array_stats(VacAttrStats *stats, + AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows); + static void prune_element_hashtable(HTAB *elements_tab, int b_current); + static uint32 element_hash(const void *key, Size keysize); + static int element_match(const void *key1, const void *key2, Size keysize); + static int element_compare(const void *key1, const void *key2); + static int trackitem_compare_frequencies_desc(const void *e1, const void *e2); + static int trackitem_compare_element(const void *e1, const void *e2); + static int countitem_compare_element(const void *e1, const void *e2); + + /* + * array_typanalyze -- a custom typanalyze function for array columns + */ + Datum + array_typanalyze(PG_FUNCTION_ARGS) + { + VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0); + TypeCacheEntry *typentry; + Oid hash_opclass, + hash_opfamily, + element_typeid, + hash_proc; + ArrayAnalyzeExtraData *extra_data; + + /* + * Call the standard typanalyze function. It may fail to find needed + * operators, in which case we also can't do anything. + */ + if (!std_typanalyze(stats)) + PG_RETURN_BOOL(false); + + /* + * Gather information about the element type. If we fail to find + * something, leave the state from std_typanalyze() in place. + */ + element_typeid = stats->attrtype->typelem; + + if (!OidIsValid(element_typeid)) + elog(ERROR, "array_typanalyze was invoked with %d non-array type", + stats->attrtypid); + + typentry = lookup_type_cache(element_typeid, TYPECACHE_EQ_OPR | + TYPECACHE_CMP_PROC | TYPECACHE_EQ_OPR_FINFO | TYPECACHE_CMP_PROC_FINFO); + + if (!OidIsValid(typentry->cmp_proc) || !OidIsValid(typentry->eq_opr)) + PG_RETURN_BOOL(true); + + hash_opclass = GetDefaultOpClass(element_typeid, HASH_AM_OID); + if (!OidIsValid(hash_opclass)) + PG_RETURN_BOOL(true); + + hash_opfamily = get_opclass_family(hash_opclass); + if (!OidIsValid(hash_opfamily)) + PG_RETURN_BOOL(true); + + hash_proc = get_opfamily_proc(hash_opfamily, element_typeid, + element_typeid, HASHPROC); + if (!OidIsValid(hash_proc)) + PG_RETURN_BOOL(true); + + /* Store our findings for use by compute_array_stats() */ + extra_data = (ArrayAnalyzeExtraData *) palloc(sizeof(ArrayAnalyzeExtraData)); + fmgr_info(hash_proc, &extra_data->hash_func_info); + InitFunctionCallInfoData(extra_data->cmp, &typentry->cmp_proc_finfo, + 2, DEFAULT_COLLATION_OID, NULL, NULL); + InitFunctionCallInfoData(extra_data->eq, &typentry->eq_opr_finfo, + 2, DEFAULT_COLLATION_OID, NULL, NULL); + InitFunctionCallInfoData(extra_data->hash, &extra_data->hash_func_info, + 1, DEFAULT_COLLATION_OID, NULL, NULL); + extra_data->type_id = typentry->type_id; + extra_data->typbyval = typentry->typbyval; + extra_data->typlen = typentry->typlen; + extra_data->typalign = typentry->typalign; + extra_data->eq_opr = typentry->eq_opr; + extra_data->std_extra_data = stats->extra_data; + extra_data->std_compute_stats = stats->compute_stats; + + /* Save old extra_data and compute_stats for scalar statistics. */ + stats->compute_stats = compute_array_stats; + stats->extra_data = extra_data; + + PG_RETURN_BOOL(true); + } + + /* + * compute_array_stats() -- compute statistics for a array column + * + * This function computes statistics useful for determining selectivity for + * operators <@, &&, and @>. + * + * In addition to finding the most common values, as we do for most + * datatypes, find the most common array elements and compute a histogram of + * distinct element counts. Exact duplicates of an entire array may be rare + * despite many arrays sharing individual elements. This especially afflicts + * long arrays, which are also liable to lack all scalar statistics due to + * the analyze.c WIDTH_THRESHOLD. + * + * The algorithm used is Lossy Counting, as proposed in the paper "Approximate + * frequency counts over data streams" by G. S. Manku and R. Motwani, in + * Proceedings of the 28th International Conference on Very Large Data Bases, + * Hong Kong, China, August 2002, section 4.2. The paper is available at + * http://www.vldb.org/conf/2002/S10P03.pdf + * + * The Lossy Counting (aka LC) algorithm goes like this: + * Let s be the threshold frequency for an item (the minimum frequency we + * are interested in) and epsilon the error margin for the frequency. Let D + * be a set of triples (e, f, delta), where e is an element value, f is that + * element's frequency (actually, its current occurrence count) and delta is + * the maximum error in f. We start with D empty and process the elements in + * batches of size w. (The batch size is also known as "bucket size" and is + * equal to 1/epsilon.) Let the current batch number be b_current, starting + * with 1. For each element e we either increment its f count, if it's + * already in D, or insert a new triple into D with values (e, 1, b_current + * - 1). After processing each batch we prune D, by removing from it all + * elements with f + delta <= b_current. After the algorithm finishes we + * suppress all elements from D that do not satisfy f >= (s - epsilon) * N, + * where N is the total number of elements in the input. We emit the + * remaining elements with estimated frequency f/N. The LC paper proves + * that this algorithm finds all elements with true frequency at least s, + * and that no frequency is overestimated or is underestimated by more than + * epsilon. Furthermore, given reasonable assumptions about the input + * distribution, the required table size is no more than about 7 times w. + * + * In the absence of a principled basis for other particular values, we + * follow ts_typanalyze() and use parameters s = 0.07/K, epsilon = s/10. We + * merely leave out the correction for stopwords, which do not apply to + * arrays. These parameters give bucket width w = K/0.007 and maximum + * expected hashtable size of about 1000 * K. + * + * Elements may repeat within an array. Since duplicates do not change the + * behavior of <@, && or @>, take measures to count each element only once + * per array. Therefore, we store in the finished pg_statistic entry each + * element's frequency as the fraction of all non-null rows that bear it. + * Divide the raw counts by nonnull_cnt to get those figures. + */ + static void + compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, + int samplerows, double totalrows) + { + int num_mcelem; + int null_cnt = 0; + int analyzed_rows = 0; + + /* + * We should count not only null array values, but also null array + * elements + */ + int null_elem_cnt = 0; + + /* This is D from the LC algorithm. */ + HTAB *elements_tab; + HASHCTL elem_hash_ctl; + HASH_SEQ_STATUS scan_status; + + /* This is the current bucket number from the LC algorithm */ + int b_current; + + /* This is 'w' from the LC algorithm */ + int bucket_width; + int array_no; + uint64 element_no; + Datum hash_key; + TrackItem *item; + + int count_items_count; + int count_item_index; + int slot_idx = 0; + HTAB *count_tab; + HASHCTL count_hash_ctl; + DistinctElementCountItem *count_item; + DistinctElementCountItem *sorted_count_items_tab; + MemoryContext old_context; + + extra_data = (ArrayAnalyzeExtraData *) stats->extra_data; + stats->extra_data = extra_data->std_extra_data; + old_context = CurrentMemoryContext; + extra_data->std_compute_stats(stats, fetchfunc, samplerows, totalrows); + MemoryContextSwitchTo(old_context); + + /* + * We want statistics_target * 10 elements in the MCELEM array. This + * multiplier is pretty arbitrary, but is meant to reflect the fact that + * the number of individual elements tracked in pg_statistic ought to be + * more than the number of values for a simple scalar column. + */ + num_mcelem = stats->attr->attstattarget * 10; + + /* + * We set bucket width equal to num_mcelem / 0.007 as per the comment + * above. + */ + bucket_width = num_mcelem * 1000 / 7; + + /* + * Create the hashtable. It will be in local memory, so we don't need to + * worry about overflowing the initial size. Also we don't need to pay any + * attention to locking and memory management. + */ + MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl)); + elem_hash_ctl.keysize = sizeof(Datum); + elem_hash_ctl.entrysize = sizeof(TrackItem); + elem_hash_ctl.hash = element_hash; + elem_hash_ctl.match = element_match; + elem_hash_ctl.hcxt = CurrentMemoryContext; + elements_tab = hash_create("Analyzed elements table", + bucket_width * 7, + &elem_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + + /* hashtable for arrays distinct element count */ + MemSet(&count_hash_ctl, 0, sizeof(count_hash_ctl)); + count_hash_ctl.keysize = sizeof(int); + count_hash_ctl.entrysize = sizeof(DistinctElementCountItem); + count_hash_ctl.hash = tag_hash; + count_hash_ctl.hcxt = CurrentMemoryContext; + count_tab = hash_create("Array distinct element count table", + 64, + &count_hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + /* Initialize counters. */ + b_current = 1; + element_no = 0; + + /* Loop over the arrays. */ + for (array_no = 0; array_no < samplerows; array_no++) + { + Datum value; + bool isnull; + bool null_present; + ArrayType *array; + char *ptr; + bits8 *bitmap; + int bitmask; + int j; + int ndims; + int *dims; + int nitems; + uint64 prev_element_no = element_no; + int distinct_count; + bool count_item_found; + + vacuum_delay_point(); + + value = fetchfunc(stats, array_no, &isnull); + if (isnull) + { + null_cnt++; + continue; + } + + /* Skip too-large values. */ + if (toast_raw_datum_size(value) > WIDTH_THRESHOLD) + continue; + else + analyzed_rows++; + + /* + * Now detoast the array if needed. + */ + array = DatumGetArrayTypeP(value); + ptr = ARR_DATA_PTR(array); + bitmap = ARR_NULLBITMAP(array); + bitmask = 1; + ndims = ARR_NDIM(array); + dims = ARR_DIMS(array); + nitems = ArrayGetNItems(ndims, dims); + + null_present = false; + + /* + * We loop through the elements in the array and add them to our + * tracking hashtable. + */ + for (j = 0; j < nitems; j++) + { + bool found; + bool isnull; + + /* Get elements, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + hash_key = (Datum) 0; + isnull = true; + null_present = true; + } + else + { + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + + /* Get element value */ + hash_key = datumCopy(fetch_att(ptr, extra_data->typbyval, + extra_data->typlen), + extra_data->typbyval, + extra_data->typlen); + isnull = false; + ptr = att_addlength_pointer(ptr, extra_data->typlen, ptr); + ptr = (char *) att_align_nominal(ptr, extra_data->typalign); + + MemoryContextSwitchTo(old_context); + } + + /* Advance bitmap pointers if any */ + bitmask <<= 1; + if (bitmask == 0x100) + { + if (bitmap) + bitmap++; + bitmask = 1; + } + + /* No null element processing other then flag setting here */ + if (isnull) + continue; + + /* Lookup current element in hashtable, adding it if new */ + item = (TrackItem *) hash_search(elements_tab, + (const void *) &hash_key, + HASH_ENTER, &found); + + if (found) + { + if (!extra_data->typbyval) + pfree(DatumGetPointer(hash_key)); + + /* + * The operators we assist ignore duplicate array elements. + * Count a given distinct element once per array. + */ + if (item->last_container != array_no) + { + item->last_container = array_no; + item->frequency++; + element_no++; + } + } + else + { + /* Initialize new tracking list element */ + item->last_container = array_no; + item->frequency = 1; + item->delta = b_current - 1; + element_no++; + } + + /* We prune the D structure after processing each bucket */ + if (element_no % bucket_width == 0) + { + prune_element_hashtable(elements_tab, b_current); + b_current++; + } + } + + /* Count null element presence once per array. */ + if (null_present) + null_elem_cnt++; + + /* Update frequency of the particular array distinct element count. */ + distinct_count = element_no - prev_element_no; + count_item = (DistinctElementCountItem *) + hash_search(count_tab, &distinct_count, + HASH_ENTER, &count_item_found); + + if (count_item_found) + count_item->frequency++; + else + count_item->frequency = 1; + + /* Free memory allocated while detoasting. */ + if (PointerGetDatum(array) != value) + pfree(array); + } + + /* Skip slots occupied by standard statistics */ + while (OidIsValid(stats->stakind[slot_idx])) + slot_idx++; + + /* Fill histogram of distinct element counts. */ + count_items_count = hash_get_num_entries(count_tab); + if (count_items_count > 0) + { + int num_hist = stats->attr->attstattarget; + int delta; + int frac; + int i; + Datum *hist_values; + + /* + * Copy distinct elements count statistics from hashtab to array and + * sort them. + */ + count_item_index = 0; + sorted_count_items_tab = (DistinctElementCountItem *) + palloc(sizeof(DistinctElementCountItem) * count_items_count); + hash_seq_init(&scan_status, count_tab); + while ((count_item = + (DistinctElementCountItem *) hash_seq_search(&scan_status)) != NULL) + { + memcpy(&sorted_count_items_tab[count_item_index], count_item, + sizeof(DistinctElementCountItem)); + count_item_index++; + } + qsort(sorted_count_items_tab, count_items_count, + sizeof(DistinctElementCountItem), countitem_compare_element); + + /* Histogram should be stored in anl_context. */ + hist_values = (Datum *) MemoryContextAlloc(stats->anl_context, + sizeof(Datum) * num_hist); + /* Fill histogram by hashtab. */ + delta = analyzed_rows - null_cnt - 1; + count_item_index = 0; + frac = sorted_count_items_tab[0].frequency * (num_hist - 1); + for (i = 0; i < num_hist; i++) + { + hist_values[i] = + Int32GetDatum(sorted_count_items_tab[count_item_index].count); + frac -= delta; + while (frac <= 0) + { + count_item_index++; + frac += sorted_count_items_tab[count_item_index].frequency * + (num_hist - 1); + } + } + + stats->stakind[slot_idx] = STATISTIC_KIND_LENGTH_HISTOGRAM; + stats->staop[slot_idx] = Int4LessOperator; + stats->stavalues[slot_idx] = hist_values; + stats->numvalues[slot_idx] = num_hist; + stats->statypid[slot_idx] = INT4OID; + stats->statyplen[slot_idx] = 4; + stats->statypbyval[slot_idx] = true; + stats->statypalign[slot_idx] = 'i'; + slot_idx++; + } + + /* We can only compute real stats if we found some non-null values. */ + if (null_cnt < analyzed_rows) + { + int nonnull_cnt = analyzed_rows - null_cnt; + int i; + TrackItem **sort_table; + int track_len; + int cutoff_freq; + int minfreq, + maxfreq; + + /* + * Construct an array of the interesting hashtable items, that is, + * those meeting the cutoff frequency (s - epsilon)*N. Also identify + * the minimum and maximum frequencies among these items. + * + * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff + * frequency is 9*N / bucket_width. + */ + cutoff_freq = 9 * element_no / bucket_width; + + i = hash_get_num_entries(elements_tab); /* surely enough space */ + sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i); + + hash_seq_init(&scan_status, elements_tab); + track_len = 0; + minfreq = element_no; + maxfreq = 0; + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency > cutoff_freq) + { + sort_table[track_len++] = item; + minfreq = Min(minfreq, item->frequency); + maxfreq = Max(maxfreq, item->frequency); + } + } + Assert(track_len <= i); + + /* emit some statistics for debug purposes */ + elog(DEBUG3, "array: target # mces = %d, bucket width = %d, " + "# elements = %lu, hashtable size = %d, usable entries = %d", + num_mcelem, bucket_width, element_no, i, track_len); + + /* + * If we obtained more elements than we really want, get rid of those + * with least frequencies. The easiest way is to qsort the array into + * descending frequency order and truncate the array. + */ + if (num_mcelem < track_len) + { + qsort(sort_table, track_len, sizeof(TrackItem *), + trackitem_compare_frequencies_desc); + /* reset minfreq to the smallest frequency we're keeping */ + minfreq = sort_table[num_mcelem - 1]->frequency; + } + else + num_mcelem = track_len; + + /* Generate MCELEM slot entry */ + if (num_mcelem > 0) + { + MemoryContext old_context; + Datum *mcelem_values; + float4 *mcelem_freqs; + + /* + * We want to store statistics sorted on the element value using + * the element type's default comparison function. This permits + * fast binary searches in selectivity estimation functions. + */ + qsort(sort_table, num_mcelem, sizeof(TrackItem *), + trackitem_compare_element); + + /* Must copy the target values into anl_context */ + old_context = MemoryContextSwitchTo(stats->anl_context); + + /* + * We sorted statistics on the element value, but we want to be + * able to find the minimal and maximal frequencies without going + * through all the values. We also want the frequency of the null + * element and the average distinct element count. Store those + * four values at the end of mcelem_freqs. + */ + mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); + mcelem_freqs = (float4 *) palloc((num_mcelem + 4) * sizeof(float4)); + + /* + * See comments above about use of nonnull_cnt as the divisor for + * the final frequency estimates. + */ + for (i = 0; i < num_mcelem; i++) + { + TrackItem *item = sort_table[i]; + + mcelem_values[i] = item->key; + mcelem_freqs[i] = (double) item->frequency / + (double) nonnull_cnt; + } + mcelem_freqs[i++] = (double) null_elem_cnt / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) maxfreq / (double) nonnull_cnt; + mcelem_freqs[i++] = (double) element_no / (double) nonnull_cnt; + MemoryContextSwitchTo(old_context); + + stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM; + stats->staop[slot_idx] = extra_data->eq_opr; + stats->stanumbers[slot_idx] = mcelem_freqs; + /* See above comment about extra fields */ + stats->numnumbers[slot_idx] = num_mcelem + 4; + stats->stavalues[slot_idx] = mcelem_values; + stats->numvalues[slot_idx] = num_mcelem; + /* We are storing values of element type */ + stats->statypid[slot_idx] = extra_data->type_id; + stats->statyplen[slot_idx] = extra_data->typlen; + stats->statypbyval[slot_idx] = extra_data->typbyval; + stats->statypalign[slot_idx] = extra_data->typalign; + } + } + + /* + * We don't need to bother cleaning up any of our temporary palloc's. The + * hashtable should also go away, as it used a child memory context. + */ + } + + /* + * A function to prune the D structure from the Lossy Counting algorithm. + * Consult compute_tsvector_stats() for wider explanation. + */ + static void + prune_element_hashtable(HTAB *elements_tab, int b_current) + { + HASH_SEQ_STATUS scan_status; + TrackItem *item; + + hash_seq_init(&scan_status, elements_tab); + while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL) + { + if (item->frequency + item->delta <= b_current) + { + Datum value = item->key; + + if (hash_search(elements_tab, (const void *) item, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "hash table corrupted"); + /* We should free memory if element is not passed by value */ + if (!extra_data->typbyval) + pfree(DatumGetPointer(value)); + } + } + } + + /* + * Hash functions for elements. Based on default hash opclass. + */ + static uint32 + element_hash(const void *key, Size keysize) + { + const Datum *l = (const Datum *) key; + + extra_data->hash.arg[0] = *l; + extra_data->hash.argnull[0] = false; + extra_data->hash.isnull = false; + return DatumGetInt32(FunctionCallInvoke(&extra_data->hash)); + } + + /* + * Matching function for elements, to be used in hashtable lookups. + */ + static int + element_match(const void *key1, const void *key2, Size keysize) + { + const Datum *d1 = (const Datum *) key1; + const Datum *d2 = (const Datum *) key2; + + extra_data->eq.arg[0] = *d1; + extra_data->eq.arg[1] = *d2; + extra_data->eq.argnull[0] = false; + extra_data->eq.argnull[1] = false; + extra_data->eq.isnull = false; + return !DatumGetInt32(FunctionCallInvoke(&extra_data->eq)); + } + + /* + * Comparison function for elements, based on default comparison function for + * element data type. + * + * XXX this may as well use SortSupport + */ + static int + element_compare(const void *key1, const void *key2) + { + const Datum *d1 = (const Datum *) key1; + const Datum *d2 = (const Datum *) key2; + + extra_data->cmp.arg[0] = *d1; + extra_data->cmp.arg[1] = *d2; + extra_data->cmp.argnull[0] = false; + extra_data->cmp.argnull[1] = false; + extra_data->cmp.isnull = false; + return DatumGetInt32(FunctionCallInvoke(&extra_data->cmp)); + } + + /* + * qsort() comparator for sorting TrackItems on frequencies (descending sort) + */ + static int + trackitem_compare_frequencies_desc(const void *e1, const void *e2) + { + const TrackItem *const * t1 = (const TrackItem *const *) e1; + const TrackItem *const * t2 = (const TrackItem *const *) e2; + + return (*t2)->frequency - (*t1)->frequency; + } + + /* + * qsort() comparator for sorting TrackItems on elements + */ + static int + trackitem_compare_element(const void *e1, const void *e2) + { + const TrackItem *const * t1 = (const TrackItem *const *) e1; + const TrackItem *const * t2 = (const TrackItem *const *) e2; + + return element_compare(&(*t1)->key, &(*t2)->key); + } + + /* + * qsort() comparator for sorting DistinctElementCountItem on elements + */ + static int + countitem_compare_element(const void *e1, const void *e2) + { + const DistinctElementCountItem *t1 = (const DistinctElementCountItem *) e1; + const DistinctElementCountItem *t2 = (const DistinctElementCountItem *) e2; + + if (t1->count < t2->count) + return -1; + else if (t1->count == t2->count) + return 0; + else + return 1; + } diff --git a/src/backend/utils/adt/selfuncs.c bindex da638f8..e6ab0f1 100644 *** a/src/backend/utils/adt/selfuncs.c --- b/src/backend/utils/adt/selfuncs.c *************** *** 1705,1710 **** scalararraysel(PlannerInfo *root, --- 1705,1735 ---- RegProcedure oprsel; FmgrInfo oprselproc; Selectivity s1; + bool varonleft; + Node *other; + VariableStatData vardata; + + /* Handle "const = qual(column)" case using array column statistics. */ + if (get_restriction_variable(root, clause->args, varRelid, + &vardata, &other, &varonleft)) + { + Oid elemtype = get_base_element_type(vardata.vartype); + + if (OidIsValid(elemtype) && IsA(other, Const)) + { + if (((Const *) other)->constisnull) + { + /* qual can't succeed if null array */ + ReleaseVariableStats(vardata); + return (Selectivity) 0.0; + } + s1 = calc_scalararraysel(&vardata, ((Const *) other)->constvalue, + useOr, operator); + ReleaseVariableStats(vardata); + return s1; + } + ReleaseVariableStats(vardata); + } /* * First, look up the underlying operator's selectivity estimator. Punt if diff --git a/src/include/catalog/pg_opeindex f19865d..ad88dc3 100644 *** a/src/include/catalog/pg_operator.h --- b/src/include/catalog/pg_operator.h *************** *** 130,135 **** DATA(insert OID = 96 ( "=" PGNSP PGUID b t t 23 23 16 96 518 int4eq eqsel e --- 130,136 ---- DESCR("equal"); DATA(insert OID = 97 ( "<" PGNSP PGUID b f f 23 23 16 521 525 int4lt scalarltsel scalarltjoinsel )); DESCR("less than"); + #define Int4LessOperator 97 DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel )); DESCR("equal"); #define TextEqualOperator 98 *************** *** 1513,1524 **** DATA(insert OID = 2590 ( "|&>" PGNSP PGUID b f f 718 718 16 0 0 circle_ove DESCR("overlaps or is above"); /* overlap/contains/contained for arrays */ ! DATA(insert OID = 2750 ( "&&" PGNSP PGUID b f f 2277 2277 16 2750 0 arrayoverlap areasel areajoinsel )); DESCR("overlaps"); ! DATA(insert OID = 2751 ( "@>" PGNSP PGUID b f f 2277 2277 16 2752 0 arraycontains contsel contjoinsel )); DESCR("contains"); ! DATA(insert OID = 2752 ( "<@" PGNSP PGUID b f f 2277 2277 16 2751 0 arraycontained contsel contjoinsel )); DESCR("is contained by"); /* capturing operators to preserve pre-8.3 behavior of text concatenation */ DATA(insert OID = 2779 ( "||" PGNSP PGUID b f f 25 2776 25 0 0 textanycat - - )); --- 1514,1528 ---- DESCR("overlaps or is above"); /* overlap/contains/contained for arrays */ ! DATA(insert OID = 2750 ( "&&" PGNSP PGUID b f f 2277 2277 16 2750 0 arrayoverlap arraysel areajoinsel )); DESCR("overlaps"); ! #define OID_ARRAY_OVERLAP_OP 2750 ! DATA(insert OID = 2751 ( "@>" PGNSP PGUID b f f 2277 2277 16 2752 0 arraycontains arraysel contjoinsel )); DESCR("contains"); ! #define OID_ARRAY_CONTAIN_OP 2751 ! DATA(insert OID = 2752 ( "<@" PGNSP PGUID b f f 2277 2277 16 2751 0 arraycontained arraysel contjoinsel )); DESCR("is contained by"); + #define OID_ARRAY_CONTAINED_OP 2752 /* capturing operators to preserve pre-8.3 behavior of text concatenation */ DATA(insert OID = 2779 ( "||" PGNSP PGUID b f f 25 2776 25 0 0 textanycat - - )); diff --git a/src/include/catalog/pg_procindex 355c61a..623e749 100644 *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** *** 865,870 **** DATA(insert OID = 2334 ( array_agg_finalfn PGNSP PGUID 12 1 0 0 0 f f f f f i --- 865,874 ---- DESCR("aggregate final function"); DATA(insert OID = 2335 ( array_agg PGNSP PGUID 12 1 0 0 0 t f f f f i 1 0 2277 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); DESCR("concatenate aggregate input into an array"); + DATA(insert OID = 3816 ( array_typanalyze PGNSP PGUID 12 1 0 0 0 f f f t f s 1 0 16 "2281" _null_ _null_ _null_ _null_ array_typanalyze _null_ _null_ _null_ )); + DESCR("array statistics collector"); + DATA(insert OID = 3817 ( arraysel PGNSP PGUID 12 1 0 0 0 f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ arraysel _null_ _null_ _null_ )); + DESCR("array selectivity estimation functions"); DATA(insert OID = 760 ( smgrin PGNSP PGUID 12 1 0 0 0 f f f t f s 1 0 210 "2275" _null_ _null_ _null_ _null_ smgrin _null_ _null_ _null_ )); DESCR("I/O"); diff --git a/src/include/catalog/pg_index 7d1d127..cab2826 100644 *** a/src/include/catalog/pg_statistic.h --- b/src/include/catalog/pg_statistic.h *************** *** 98,108 **** CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS --- 98,110 ---- int2 stakind2; int2 stakind3; int2 stakind4; + int2 stakind5; Oid staop1; Oid staop2; Oid staop3; Oid staop4; + Oid staop5; /* * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent *************** *** 115,120 **** CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS --- 117,123 ---- float4 stanumbers2[1]; float4 stanumbers3[1]; float4 stanumbers4[1]; + float4 stanumbers5[1]; /* * Values in these arrays are values of the column's data type. We *************** *** 125,133 **** CATALOG(pg_statistic,2619) BKI_WITHOUT_OIDS anyarray stavalues2; anyarray stavalues3; anyarray stavalues4; } FormData_pg_statistic; ! #define STATISTIC_NUM_SLOTS 4 #undef anyarray --- 128,137 ---- anyarray stavalues2; anyarray stavalues3; anyarray stavalues4; + anyarray stavalues5; } FormData_pg_statistic; ! #define STATISTIC_NUM_SLOTS 5 #undef anyarray *************** *** 143,149 **** typedef FormData_pg_statistic *Form_pg_statistic; * compiler constants for pg_statistic * ---------------- */ ! #define Natts_pg_statistic 22 #define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_staattnum 2 #define Anum_pg_statistic_stainherit 3 --- 147,153 ---- * compiler constants for pg_statistic * ---------------- */ ! #define Natts_pg_statistic 26 #define Anum_pg_statistic_starelid 1 #define Anum_pg_statistic_staattnum 2 #define Anum_pg_statistic_stainherit 3 *************** *** 154,179 **** typedef FormData_pg_statistic *Form_pg_statistic; #define Anum_pg_statistic_stakind2 8 #define Anum_pg_statistic_stakind3 9 #define Anum_pg_statistic_stakind4 10 ! #define Anum_pg_statistic_staop1 11 ! #define Anum_pg_statistic_staop2 12 ! #define Anum_pg_statistic_staop3 13 ! #define Anum_pg_statistic_staop4 14 ! #define Anum_pg_statistic_stanumbers1 15 ! #define Anum_pg_statistic_stanumbers2 16 ! #define Anum_pg_statistic_stanumbers3 17 ! #define Anum_pg_statistic_stanumbers4 18 ! #define Anum_pg_statistic_stavalues1 19 ! #define Anum_pg_statistic_stavalues2 20 ! #define Anum_pg_statistic_stavalues3 21 ! #define Anum_pg_statistic_stavalues4 22 /* ! * Currently, three statistical slot "kinds" are defined: most common values, ! * histogram, and correlation. Additional "kinds" will probably appear in ! * future to help cope with non-scalar datatypes. Also, custom data types ! * can define their own "kind" codes by mutual agreement between a custom ! * typanalyze routine and the selectivity estimation functions of the type's ! * operators. * * Code reading the pg_statistic relation should not assume that a particular * data "kind" will appear in any particular slot. Instead, search the --- 158,186 ---- #define Anum_pg_statistic_stakind2 8 #define Anum_pg_statistic_stakind3 9 #define Anum_pg_statistic_stakind4 10 ! #define Anum_pg_statistic_stakind5 11 ! #define Anum_pg_statistic_staop1 12 ! #define Anum_pg_statistic_staop2 13 ! #define Anum_pg_statistic_staop3 14 ! #define Anum_pg_statistic_staop4 15 ! #define Anum_pg_statistic_staop5 16 ! #define Anum_pg_statistic_stanumbers1 17 ! #define Anum_pg_statistic_stanumbers2 18 ! #define Anum_pg_statistic_stanumbers3 19 ! #define Anum_pg_statistic_stanumbers4 20 ! #define Anum_pg_statistic_stanumbers5 21 ! #define Anum_pg_statistic_stavalues1 22 ! #define Anum_pg_statistic_stavalues2 23 ! #define Anum_pg_statistic_stavalues3 24 ! #define Anum_pg_statistic_stavalues4 25 ! #define Anum_pg_statistic_stavalues5 26 /* ! * Currently, five statistical slot "kinds" are defined: most common values, ! * histogram, correlation, most common elements and histogram of distinct ! * element count. Also, custom data types can define their own "kind" codes ! * by mutual agreement between a custom typanalyze routine and the selectivity ! * estimation functions of the type's operators. * * Code reading the pg_statistic relation should not assume that a particular * data "kind" will appear in any particular slot. Instead, search the *************** *** 260,263 **** typedef FormData_pg_statistic *Form_pg_statistic; --- 267,280 ---- */ #define STATISTIC_KIND_MCELEM 4 + /* + * A "length histogram" slot resembles a "histogram" slot in structure. + * Instead of actual column values, the population consists of counts of + * distinct elements found within the column values. stavalues contains M + * (>=2) non-null values that divide the non-null column data values into M-1 + * bins of approximately equal population. The first stavalues item is the + * minimum count and the last is the maximum count. + */ + #define STATISTIC_KIND_LENGTH_HISTOGRAM 5 + #endif /* PG_STATISTIC_H */ diff --git a/src/include/catalog/pg_type.index e12efe4..2580a38 100644 *** a/src/include/catalog/pg_type.h --- b/src/include/catalog/pg_type.h *************** *** 353,359 **** DATA(insert OID = 83 ( pg_class PGNSP PGUID -1 f c C f t \054 1259 0 0 record_i DATA(insert OID = 142 ( xml PGNSP PGUID -1 f b U f t \054 0 0 143 xml_in xml_out xml_recv xml_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("XML content"); #define XMLOID 142 ! DATA(insert OID = 143 ( _xml PGNSP PGUID -1 f b A f t \054 0 142 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node_tree_in pg_node_tree_out pg_node_tree_recv pg_node_tree_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); DESCR("string representing an internal node tree"); --- 353,359 ---- DATA(insert OID = 142 ( xml PGNSP PGUID -1 f b U f t \054 0 0 143 xml_in xml_out xml_recv xml_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("XML content"); #define XMLOID 142 ! DATA(insert OID = 143 ( _xml PGNSP PGUID -1 f b A f t \054 0 142 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node_tree_in pg_node_tree_out pg_node_tree_recv pg_node_tree_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); DESCR("string representing an internal node tree"); *************** *** 390,396 **** DESCR("geometric polygon '(pt1,...)'"); DATA(insert OID = 628 ( line PGNSP PGUID 32 f b G f t \054 0 701 629 line_in line_out line_recv line_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric line (not implemented)"); #define LINEOID 628 ! DATA(insert OID = 629 ( _line PGNSP PGUID -1 f b A f t \054 0 628 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR(""); /* OIDS 700 - 799 */ --- 390,396 ---- DATA(insert OID = 628 ( line PGNSP PGUID 32 f b G f t \054 0 701 629 line_in line_out line_recv line_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric line (not implemented)"); #define LINEOID 628 ! DATA(insert OID = 629 ( _line PGNSP PGUID -1 f b A f t \054 0 628 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR(""); /* OIDS 700 - 799 */ *************** *** 417,427 **** DESCR(""); DATA(insert OID = 718 ( circle PGNSP PGUID 24 f b G f t \054 0 0 719 circle_in circle_out circle_recv circle_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric circle '(center,radius)'"); #define CIRCLEOID 718 ! DATA(insert OID = 719 ( _circle PGNSP PGUID -1 f b A f t \054 0 718 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 790 ( money PGNSP PGUID 8 FLOAT8PASSBYVAL b N f t \054 0 0 791 cash_in cash_out cash_recv cash_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("monetary amounts, $d,ddd.cc"); #define CASHOID 790 ! DATA(insert OID = 791 ( _money PGNSP PGUID -1 f b A f t \054 0 790 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 800 - 899 */ DATA(insert OID = 829 ( macaddr PGNSP PGUID 6 f b U f t \054 0 0 1040 macaddr_in macaddr_out macaddr_recv macaddr_send - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); --- 417,427 ---- DATA(insert OID = 718 ( circle PGNSP PGUID 24 f b G f t \054 0 0 719 circle_in circle_out circle_recv circle_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("geometric circle '(center,radius)'"); #define CIRCLEOID 718 ! DATA(insert OID = 719 ( _circle PGNSP PGUID -1 f b A f t \054 0 718 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 790 ( money PGNSP PGUID 8 FLOAT8PASSBYVAL b N f t \054 0 0 791 cash_in cash_out cash_recv cash_send - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("monetary amounts, $d,ddd.cc"); #define CASHOID 790 ! DATA(insert OID = 791 ( _money PGNSP PGUID -1 f b A f t \054 0 790 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 800 - 899 */ DATA(insert OID = 829 ( macaddr PGNSP PGUID 6 f b U f t \054 0 0 1040 macaddr_in macaddr_out macaddr_recv macaddr_send - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); *************** *** 437,480 **** DESCR("network IP address/netmask, network address"); /* OIDS 900 - 999 */ /* OIDS 1000 - 1099 */ ! DATA(insert OID = 1000 ( _bool PGNSP PGUID -1 f b A f t \054 0 16 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1001 ( _bytea PGNSP PGUID -1 f b A f t \054 0 17 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1002 ( _char PGNSP PGUID -1 f b A f t \054 0 18 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1003 ( _name PGNSP PGUID -1 f b A f t \054 0 19 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1005 ( _int2 PGNSP PGUID -1 f b A f t \054 0 21 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1006 ( _int2vector PGNSP PGUID -1 f b A f t \054 0 22 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1007 ( _int4 PGNSP PGUID -1 f b A f t \054 0 23 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); #define INT4ARRAYOID 1007 ! DATA(insert OID = 1008 ( _regproc PGNSP PGUID -1 f b A f t \054 0 24 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1009 ( _text PGNSP PGUID -1 f b A f t \054 0 25 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); #define TEXTARRAYOID 1009 ! DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); ! DATA(insert OID = 1015 ( _varchar PGNSP PGUID -1 f b A f t \054 0 1043 0 array_in array_out array_recv array_send varchartypmodin varchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); ! DATA(insert OID = 1016 ( _int8 PGNSP PGUID -1 f b A f t \054 0 20 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1017 ( _point PGNSP PGUID -1 f b A f t \054 0 600 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1018 ( _lseg PGNSP PGUID -1 f b A f t \054 0 601 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1019 ( _path PGNSP PGUID -1 f b A f t \054 0 602 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1020 ( _box PGNSP PGUID -1 f b A f t \073 0 603 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1021 ( _float4 PGNSP PGUID -1 f b A f t \054 0 700 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); #define FLOAT4ARRAYOID 1021 ! DATA(insert OID = 1022 ( _float8 PGNSP PGUID -1 f b A f t \054 0 701 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1023 ( _abstime PGNSP PGUID -1 f b A f t \054 0 702 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1024 ( _reltime PGNSP PGUID -1 f b A f t \054 0 703 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1025 ( _tinterval PGNSP PGUID -1 f b A f t \054 0 704 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1027 ( _polygon PGNSP PGUID -1 f b A f t \054 0 604 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1033 ( aclitem PGNSP PGUID 12 f b U f t \054 0 0 1034 aclitemin aclitemout - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("access control list"); #define ACLITEMOID 1033 ! DATA(insert OID = 1034 ( _aclitem PGNSP PGUID -1 f b A f t \054 0 1033 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1040 ( _macaddr PGNSP PGUID -1 f b A f t \054 0 829 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1041 ( _inet PGNSP PGUID -1 f b A f t \054 0 869 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 651 ( _cidr PGNSP PGUID -1 f b A f t \054 0 650 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1263 ( _cstring PGNSP PGUID -1 f b A f t \054 0 2275 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); #define CSTRINGARRAYOID 1263 DATA(insert OID = 1042 ( bpchar PGNSP PGUID -1 f b S f t \054 0 0 1014 bpcharin bpcharout bpcharrecv bpcharsend bpchartypmodin bpchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); --- 437,480 ---- /* OIDS 900 - 999 */ /* OIDS 1000 - 1099 */ ! DATA(insert OID = 1000 ( _bool PGNSP PGUID -1 f b A f t \054 0 16 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1001 ( _bytea PGNSP PGUID -1 f b A f t \054 0 17 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1002 ( _char PGNSP PGUID -1 f b A f t \054 0 18 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1003 ( _name PGNSP PGUID -1 f b A f t \054 0 19 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1005 ( _int2 PGNSP PGUID -1 f b A f t \054 0 21 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1006 ( _int2vector PGNSP PGUID -1 f b A f t \054 0 22 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1007 ( _int4 PGNSP PGUID -1 f b A f t \054 0 23 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define INT4ARRAYOID 1007 ! DATA(insert OID = 1008 ( _regproc PGNSP PGUID -1 f b A f t \054 0 24 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1009 ( _text PGNSP PGUID -1 f b A f t \054 0 25 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); #define TEXTARRAYOID 1009 ! DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); ! DATA(insert OID = 1015 ( _varchar PGNSP PGUID -1 f b A f t \054 0 1043 0 array_in array_out array_recv array_send varchartypmodin varchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); ! DATA(insert OID = 1016 ( _int8 PGNSP PGUID -1 f b A f t \054 0 20 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1017 ( _point PGNSP PGUID -1 f b A f t \054 0 600 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1018 ( _lseg PGNSP PGUID -1 f b A f t \054 0 601 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1019 ( _path PGNSP PGUID -1 f b A f t \054 0 602 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1020 ( _box PGNSP PGUID -1 f b A f t \073 0 603 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1021 ( _float4 PGNSP PGUID -1 f b A f t \054 0 700 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define FLOAT4ARRAYOID 1021 ! DATA(insert OID = 1022 ( _float8 PGNSP PGUID -1 f b A f t \054 0 701 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1023 ( _abstime PGNSP PGUID -1 f b A f t \054 0 702 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1024 ( _reltime PGNSP PGUID -1 f b A f t \054 0 703 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1025 ( _tinterval PGNSP PGUID -1 f b A f t \054 0 704 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1027 ( _polygon PGNSP PGUID -1 f b A f t \054 0 604 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1033 ( aclitem PGNSP PGUID 12 f b U f t \054 0 0 1034 aclitemin aclitemout - - - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("access control list"); #define ACLITEMOID 1033 ! DATA(insert OID = 1034 ( _aclitem PGNSP PGUID -1 f b A f t \054 0 1033 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1040 ( _macaddr PGNSP PGUID -1 f b A f t \054 0 829 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1041 ( _inet PGNSP PGUID -1 f b A f t \054 0 869 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 651 ( _cidr PGNSP PGUID -1 f b A f t \054 0 650 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1263 ( _cstring PGNSP PGUID -1 f b A f t \054 0 2275 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define CSTRINGARRAYOID 1263 DATA(insert OID = 1042 ( bpchar PGNSP PGUID -1 f b S f t \054 0 0 1014 bpcharin bpcharout bpcharrecv bpcharsend bpchartypmodin bpchartypmodout - i x f 0 -1 0 100 _null_ _null_ _null_ )); *************** *** 495,528 **** DESCR("time of day"); DATA(insert OID = 1114 ( timestamp PGNSP PGUID 8 FLOAT8PASSBYVAL b D f t \054 0 0 1115 timestamp_in timestamp_out timestamp_recv timestamp_send timestamptypmodin timestamptypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time"); #define TIMESTAMPOID 1114 ! DATA(insert OID = 1115 ( _timestamp PGNSP PGUID -1 f b A f t \054 0 1114 0 array_in array_out array_recv array_send timestamptypmodin timestamptypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1182 ( _date PGNSP PGUID -1 f b A f t \054 0 1082 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1183 ( _time PGNSP PGUID -1 f b A f t \054 0 1083 0 array_in array_out array_recv array_send timetypmodin timetypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1184 ( timestamptz PGNSP PGUID 8 FLOAT8PASSBYVAL b D t t \054 0 0 1185 timestamptz_in timestamptz_out timestamptz_recv timestamptz_send timestamptztypmodin timestamptztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time with time zone"); #define TIMESTAMPTZOID 1184 ! DATA(insert OID = 1185 ( _timestamptz PGNSP PGUID -1 f b A f t \054 0 1184 0 array_in array_out array_recv array_send timestamptztypmodin timestamptztypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1186 ( interval PGNSP PGUID 16 f b T t t \054 0 0 1187 interval_in interval_out interval_recv interval_send intervaltypmodin intervaltypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("@ , time interval"); #define INTERVALOID 1186 ! DATA(insert OID = 1187 ( _interval PGNSP PGUID -1 f b A f t \054 0 1186 0 array_in array_out array_recv array_send intervaltypmodin intervaltypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1200 - 1299 */ ! DATA(insert OID = 1231 ( _numeric PGNSP PGUID -1 f b A f t \054 0 1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1266 ( timetz PGNSP PGUID 12 f b D f t \054 0 0 1270 timetz_in timetz_out timetz_recv timetz_send timetztypmodin timetztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("time of day with time zone"); #define TIMETZOID 1266 ! DATA(insert OID = 1270 ( _timetz PGNSP PGUID -1 f b A f t \054 0 1266 0 array_in array_out array_recv array_send timetztypmodin timetztypmodout - d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1500 - 1599 */ DATA(insert OID = 1560 ( bit PGNSP PGUID -1 f b V f t \054 0 0 1561 bit_in bit_out bit_recv bit_send bittypmodin bittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("fixed-length bit string"); #define BITOID 1560 ! DATA(insert OID = 1561 ( _bit PGNSP PGUID -1 f b A f t \054 0 1560 0 array_in array_out array_recv array_send bittypmodin bittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1562 ( varbit PGNSP PGUID -1 f b V t t \054 0 0 1563 varbit_in varbit_out varbit_recv varbit_send varbittypmodin varbittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("variable-length bit string"); #define VARBITOID 1562 ! DATA(insert OID = 1563 ( _varbit PGNSP PGUID -1 f b A f t \054 0 1562 0 array_in array_out array_recv array_send varbittypmodin varbittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1600 - 1699 */ --- 495,528 ---- DATA(insert OID = 1114 ( timestamp PGNSP PGUID 8 FLOAT8PASSBYVAL b D f t \054 0 0 1115 timestamp_in timestamp_out timestamp_recv timestamp_send timestamptypmodin timestamptypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time"); #define TIMESTAMPOID 1114 ! DATA(insert OID = 1115 ( _timestamp PGNSP PGUID -1 f b A f t \054 0 1114 0 array_in array_out array_recv array_send timestamptypmodin timestamptypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1182 ( _date PGNSP PGUID -1 f b A f t \054 0 1082 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1183 ( _time PGNSP PGUID -1 f b A f t \054 0 1083 0 array_in array_out array_recv array_send timetypmodin timetypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1184 ( timestamptz PGNSP PGUID 8 FLOAT8PASSBYVAL b D t t \054 0 0 1185 timestamptz_in timestamptz_out timestamptz_recv timestamptz_send timestamptztypmodin timestamptztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("date and time with time zone"); #define TIMESTAMPTZOID 1184 ! DATA(insert OID = 1185 ( _timestamptz PGNSP PGUID -1 f b A f t \054 0 1184 0 array_in array_out array_recv array_send timestamptztypmodin timestamptztypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1186 ( interval PGNSP PGUID 16 f b T t t \054 0 0 1187 interval_in interval_out interval_recv interval_send intervaltypmodin intervaltypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("@ , time interval"); #define INTERVALOID 1186 ! DATA(insert OID = 1187 ( _interval PGNSP PGUID -1 f b A f t \054 0 1186 0 array_in array_out array_recv array_send intervaltypmodin intervaltypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1200 - 1299 */ ! DATA(insert OID = 1231 ( _numeric PGNSP PGUID -1 f b A f t \054 0 1700 0 array_in array_out array_recv array_send numerictypmodin numerictypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1266 ( timetz PGNSP PGUID 12 f b D f t \054 0 0 1270 timetz_in timetz_out timetz_recv timetz_send timetztypmodin timetztypmodout - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("time of day with time zone"); #define TIMETZOID 1266 ! DATA(insert OID = 1270 ( _timetz PGNSP PGUID -1 f b A f t \054 0 1266 0 array_in array_out array_recv array_send timetztypmodin timetztypmodout array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1500 - 1599 */ DATA(insert OID = 1560 ( bit PGNSP PGUID -1 f b V f t \054 0 0 1561 bit_in bit_out bit_recv bit_send bittypmodin bittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("fixed-length bit string"); #define BITOID 1560 ! DATA(insert OID = 1561 ( _bit PGNSP PGUID -1 f b A f t \054 0 1560 0 array_in array_out array_recv array_send bittypmodin bittypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1562 ( varbit PGNSP PGUID -1 f b V t t \054 0 0 1563 varbit_in varbit_out varbit_recv varbit_send varbittypmodin varbittypmodout - i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("variable-length bit string"); #define VARBITOID 1562 ! DATA(insert OID = 1563 ( _varbit PGNSP PGUID -1 f b A f t \054 0 1562 0 array_in array_out array_recv array_send varbittypmodin varbittypmodout array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); /* OIDS 1600 - 1699 */ *************** *** 536,542 **** DESCR("reference to cursor (portal name)"); #define REFCURSOROID 1790 /* OIDS 2200 - 2299 */ ! DATA(insert OID = 2201 ( _refcursor PGNSP PGUID -1 f b A f t \054 0 1790 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2202 ( regprocedure PGNSP PGUID 4 t b N f t \054 0 0 2207 regprocedurein regprocedureout regprocedurerecv regproceduresend - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("registered procedure (with args)"); --- 536,542 ---- #define REFCURSOROID 1790 /* OIDS 2200 - 2299 */ ! DATA(insert OID = 2201 ( _refcursor PGNSP PGUID -1 f b A f t \054 0 1790 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2202 ( regprocedure PGNSP PGUID 4 t b N f t \054 0 0 2207 regprocedurein regprocedureout regprocedurerecv regproceduresend - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("registered procedure (with args)"); *************** *** 558,574 **** DATA(insert OID = 2206 ( regtype PGNSP PGUID 4 t b N f t \054 0 0 2211 regty DESCR("registered type"); #define REGTYPEOID 2206 ! DATA(insert OID = 2207 ( _regprocedure PGNSP PGUID -1 f b A f t \054 0 2202 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2208 ( _regoper PGNSP PGUID -1 f b A f t \054 0 2203 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2209 ( _regoperator PGNSP PGUID -1 f b A f t \054 0 2204 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2210 ( _regclass PGNSP PGUID -1 f b A f t \054 0 2205 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2211 ( _regtype PGNSP PGUID -1 f b A f t \054 0 2206 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); #define REGTYPEARRAYOID 2211 /* uuid */ DATA(insert OID = 2950 ( uuid PGNSP PGUID 16 f b U f t \054 0 0 2951 uuid_in uuid_out uuid_recv uuid_send - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("UUID datatype"); ! DATA(insert OID = 2951 ( _uuid PGNSP PGUID -1 f b A f t \054 0 2950 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); /* text search */ DATA(insert OID = 3614 ( tsvector PGNSP PGUID -1 f b U f t \054 0 0 3643 tsvectorin tsvectorout tsvectorrecv tsvectorsend - - ts_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); --- 558,574 ---- DESCR("registered type"); #define REGTYPEOID 2206 ! DATA(insert OID = 2207 ( _regprocedure PGNSP PGUID -1 f b A f t \054 0 2202 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2208 ( _regoper PGNSP PGUID -1 f b A f t \054 0 2203 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2209 ( _regoperator PGNSP PGUID -1 f b A f t \054 0 2204 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2210 ( _regclass PGNSP PGUID -1 f b A f t \054 0 2205 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 2211 ( _regtype PGNSP PGUID -1 f b A f t \054 0 2206 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define REGTYPEARRAYOID 2211 /* uuid */ DATA(insert OID = 2950 ( uuid PGNSP PGUID 16 f b U f t \054 0 0 2951 uuid_in uuid_out uuid_recv uuid_send - - - c p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("UUID datatype"); ! DATA(insert OID = 2951 ( _uuid PGNSP PGUID -1 f b A f t \054 0 2950 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); /* text search */ DATA(insert OID = 3614 ( tsvector PGNSP PGUID -1 f b U f t \054 0 0 3643 tsvectorin tsvectorout tsvectorrecv tsvectorsend - - ts_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); *************** *** 587,622 **** DATA(insert OID = 3769 ( regdictionary PGNSP PGUID 4 t b N f t \054 0 0 3770 reg DESCR("registered text search dictionary"); #define REGDICTIONARYOID 3769 ! DATA(insert OID = 3643 ( _tsvector PGNSP PGUID -1 f b A f t \054 0 3614 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3644 ( _gtsvector PGNSP PGUID -1 f b A f t \054 0 3642 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3645 ( _tsquery PGNSP PGUID -1 f b A f t \054 0 3615 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3735 ( _regconfig PGNSP PGUID -1 f b A f t \054 0 3734 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3770 ( _regdictionary PGNSP PGUID -1 f b A f t \054 0 3769 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2970 ( txid_snapshot PGNSP PGUID -1 f b U f t \054 0 0 2949 txid_snapshot_in txid_snapshot_out txid_snapshot_recv txid_snapshot_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("txid snapshot"); ! DATA(insert OID = 2949 ( _txid_snapshot PGNSP PGUID -1 f b A f t \054 0 2970 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); /* range types */ DATA(insert OID = 3904 ( int4range PGNSP PGUID -1 f r R f t \054 0 0 3905 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of integers"); #define INT4RANGEOID 3904 ! DATA(insert OID = 3905 ( _int4range PGNSP PGUID -1 f b A f t \054 0 3904 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3906 ( numrange PGNSP PGUID -1 f r R f t \054 0 0 3907 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of numerics"); ! DATA(insert OID = 3907 ( _numrange PGNSP PGUID -1 f b A f t \054 0 3906 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3908 ( tsrange PGNSP PGUID -1 f r R f t \054 0 0 3909 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps without time zone"); ! DATA(insert OID = 3909 ( _tsrange PGNSP PGUID -1 f b A f t \054 0 3908 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3910 ( tstzrange PGNSP PGUID -1 f r R f t \054 0 0 3911 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps with time zone"); ! DATA(insert OID = 3911 ( _tstzrange PGNSP PGUID -1 f b A f t \054 0 3910 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3912 ( daterange PGNSP PGUID -1 f r R f t \054 0 0 3913 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of dates"); ! DATA(insert OID = 3913 ( _daterange PGNSP PGUID -1 f b A f t \054 0 3912 0 array_in array_out array_recv array_send - - - i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3926 ( int8range PGNSP PGUID -1 f r R f t \054 0 0 3927 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of bigints"); ! DATA(insert OID = 3927 ( _int8range PGNSP PGUID -1 f b A f t \054 0 3926 0 array_in array_out array_recv array_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); /* * pseudo-types --- 587,622 ---- DESCR("registered text search dictionary"); #define REGDICTIONARYOID 3769 ! DATA(insert OID = 3643 ( _tsvector PGNSP PGUID -1 f b A f t \054 0 3614 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3644 ( _gtsvector PGNSP PGUID -1 f b A f t \054 0 3642 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3645 ( _tsquery PGNSP PGUID -1 f b A f t \054 0 3615 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3735 ( _regconfig PGNSP PGUID -1 f b A f t \054 0 3734 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 3770 ( _regdictionary PGNSP PGUID -1 f b A f t \054 0 3769 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 2970 ( txid_snapshot PGNSP PGUID -1 f b U f t \054 0 0 2949 txid_snapshot_in txid_snapshot_out txid_snapshot_recv txid_snapshot_send - - - d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("txid snapshot"); ! DATA(insert OID = 2949 ( _txid_snapshot PGNSP PGUID -1 f b A f t \054 0 2970 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* range types */ DATA(insert OID = 3904 ( int4range PGNSP PGUID -1 f r R f t \054 0 0 3905 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of integers"); #define INT4RANGEOID 3904 ! DATA(insert OID = 3905 ( _int4range PGNSP PGUID -1 f b A f t \054 0 3904 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3906 ( numrange PGNSP PGUID -1 f r R f t \054 0 0 3907 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of numerics"); ! DATA(insert OID = 3907 ( _numrange PGNSP PGUID -1 f b A f t \054 0 3906 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3908 ( tsrange PGNSP PGUID -1 f r R f t \054 0 0 3909 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps without time zone"); ! DATA(insert OID = 3909 ( _tsrange PGNSP PGUID -1 f b A f t \054 0 3908 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3910 ( tstzrange PGNSP PGUID -1 f r R f t \054 0 0 3911 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of timestamps with time zone"); ! DATA(insert OID = 3911 ( _tstzrange PGNSP PGUID -1 f b A f t \054 0 3910 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3912 ( daterange PGNSP PGUID -1 f r R f t \054 0 0 3913 range_in range_out range_recv range_send - - range_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of dates"); ! DATA(insert OID = 3913 ( _daterange PGNSP PGUID -1 f b A f t \054 0 3912 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 3926 ( int8range PGNSP PGUID -1 f r R f t \054 0 0 3927 range_in range_out range_recv range_send - - range_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("range of bigints"); ! DATA(insert OID = 3927 ( _int8range PGNSP PGUID -1 f b A f t \054 0 3926 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); /* * pseudo-types diff --git a/src/include/commands/vaindex 4526648..e994193 100644 *** a/src/include/commands/vacuum.h --- b/src/include/commands/vacuum.h *************** *** 167,171 **** extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, --- 167,172 ---- /* in commands/analyze.c */ extern void analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy); + extern bool std_typanalyze(VacAttrStats *stats); #endif /* VACUUM_H */ diff --git a/src/include/utils/arrayindex c6d0ad6..4e51491 100644 *** a/src/include/utils/array.h --- b/src/include/utils/array.h *************** *** 289,292 **** extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo, --- 289,302 ---- extern Datum array_agg_transfn(PG_FUNCTION_ARGS); extern Datum array_agg_finalfn(PG_FUNCTION_ARGS); + /* + * prototypes for functions defined in array_selfuncs.c + */ + extern Datum arraysel(PG_FUNCTION_ARGS); + + /* + * prototypes for functions defined in array_typanalyze.c + */ + extern Datum array_typanalyze(PG_FUNCTION_ARGS); + #endif /* ARRAY_H */ diff --git a/src/include/utils/sindex 78eda1b..335b2a0 100644 *** a/src/include/utils/selfuncs.h --- b/src/include/utils/selfuncs.h *************** *** 165,170 **** extern Datum icregexnejoinsel(PG_FUNCTION_ARGS); --- 165,172 ---- extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); + extern Selectivity calc_scalararraysel(VariableStatData *vardata, Datum constval, + bool orClause, Oid operator); extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); diff --git a/src/test/regress/expecindex 6e55349..9865b69 100644 *** a/src/test/regress/expected/arrays.out --- b/src/test/regress/expected/arrays.out *************** *** 421,426 **** SELECT 0 || ARRAY[1,2] || 3 AS "{0,1,2,3}"; --- 421,427 ---- {0,1,2,3} (1 row) + ANALYZE array_op_test; SELECT * FROM array_op_test WHERE i @> '{32}' ORDER BY seqno; seqno | i | t -------+---------------------------------+------------------------------------------------------------------------------------------------------------------------------------ diff --git a/src/test/regress/expected/ruleindex 454e1f9..0a9287f 100644 *** a/src/test/regress/expected/rules.out --- b/src/test/regress/expected/rules.out *************** *** 1317,1323 **** SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem pg_statio_user_indexes | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text)); pg_statio_user_sequences | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text)); pg_statio_user_tables | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text)); ! pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stavalues1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stavalues2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stavalues3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stavalues4 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = ANY (ARRAY[1, 4])) THEN s.stanumbers1 WHEN (s.stakind2 = ANY (ARRAY[1, 4])) THEN s.stanumbers2 WHEN (s.stakind3 = ANY (ARRAY[1, 4])) THEN s.stanumbers3 WHEN (s.stakind4 = ANY (ARRAY[1, 4])) THEN s.stanumbers4 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] ELSE NULL::real END AS correlation FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text)); pg_tables | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char"); pg_timezone_abbrevs | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst); pg_timezone_names | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst); --- 1317,1323 ---- pg_statio_user_indexes | SELECT pg_statio_all_indexes.relid, pg_statio_all_indexes.indexrelid, pg_statio_all_indexes.schemaname, pg_statio_all_indexes.relname, pg_statio_all_indexes.indexrelname, pg_statio_all_indexes.idx_blks_read, pg_statio_all_indexes.idx_blks_hit FROM pg_statio_all_indexes WHERE ((pg_statio_all_indexes.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_indexes.schemaname !~ '^pg_toast'::text)); pg_statio_user_sequences | SELECT pg_statio_all_sequences.relid, pg_statio_all_sequences.schemaname, pg_statio_all_sequences.relname, pg_statio_all_sequences.blks_read, pg_statio_all_sequences.blks_hit FROM pg_statio_all_sequences WHERE ((pg_statio_all_sequences.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_sequences.schemaname !~ '^pg_toast'::text)); pg_statio_user_tables | SELECT pg_statio_all_tables.relid, pg_statio_all_tables.schemaname, pg_statio_all_tables.relname, pg_statio_all_tables.heap_blks_read, pg_statio_all_tables.heap_blks_hit, pg_statio_all_tables.idx_blks_read, pg_statio_all_tables.idx_blks_hit, pg_statio_all_tables.toast_blks_read, pg_statio_all_tables.toast_blks_hit, pg_statio_all_tables.tidx_blks_read, pg_statio_all_tables.tidx_blks_hit FROM pg_statio_all_tables WHERE ((pg_statio_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_statio_all_tables.schemaname !~ '^pg_toast'::text)); ! pg_stats | SELECT n.nspname AS schemaname, c.relname AS tablename, a.attname, s.stainherit AS inherited, s.stanullfrac AS null_frac, s.stawidth AS avg_width, s.stadistinct AS n_distinct, CASE WHEN (s.stakind1 = 1) THEN s.stavalues1 WHEN (s.stakind2 = 1) THEN s.stavalues2 WHEN (s.stakind3 = 1) THEN s.stavalues3 WHEN (s.stakind4 = 1) THEN s.stavalues4 WHEN (s.stakind5 = 1) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_vals, CASE WHEN (s.stakind1 = 1) THEN s.stanumbers1 WHEN (s.stakind2 = 1) THEN s.stanumbers2 WHEN (s.stakind3 = 1) THEN s.stanumbers3 WHEN (s.stakind4 = 1) THEN s.stanumbers4 WHEN (s.stakind5 = 1) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_freqs, CASE WHEN (s.stakind1 = 2) THEN s.stavalues1 WHEN (s.stakind2 = 2) THEN s.stavalues2 WHEN (s.stakind3 = 2) THEN s.stavalues3 WHEN (s.stakind4 = 2) THEN s.stavalues4 WHEN (s.stakind5 = 2) THEN s.stavalues5 ELSE NULL::anyarray END AS histogram_bounds, CASE WHEN (s.stakind1 = 3) THEN s.stanumbers1[1] WHEN (s.stakind2 = 3) THEN s.stanumbers2[1] WHEN (s.stakind3 = 3) THEN s.stanumbers3[1] WHEN (s.stakind4 = 3) THEN s.stanumbers4[1] WHEN (s.stakind5 = 3) THEN s.stanumbers5[1] ELSE NULL::real END AS correlation, CASE WHEN (s.stakind1 = 4) THEN s.stavalues1 WHEN (s.stakind2 = 4) THEN s.stavalues2 WHEN (s.stakind3 = 4) THEN s.stavalues3 WHEN (s.stakind4 = 4) THEN s.stavalues4 WHEN (s.stakind5 = 4) THEN s.stavalues5 ELSE NULL::anyarray END AS most_common_elems, CASE WHEN (s.stakind1 = 4) THEN s.stanumbers1 WHEN (s.stakind2 = 4) THEN s.stanumbers2 WHEN (s.stakind3 = 4) THEN s.stanumbers3 WHEN (s.stakind4 = 4) THEN s.stanumbers4 WHEN (s.stakind5 = 4) THEN s.stanumbers5 ELSE NULL::real[] END AS most_common_elem_freqs, CASE WHEN (s.stakind1 = 5) THEN s.stavalues1 WHEN (s.stakind2 = 5) THEN s.stavalues2 WHEN (s.stakind3 = 5) THEN s.stavalues3 WHEN (s.stakind4 = 5) THEN s.stavalues4 WHEN (s.stakind5 = 5) THEN s.stavalues5 ELSE NULL::anyarray END AS length_histogram_bounds FROM (((pg_statistic s JOIN pg_class c ON ((c.oid = s.starelid))) JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text)); pg_tables | SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, t.spcname AS tablespace, c.relhasindex AS hasindexes, c.relhasrules AS hasrules, c.relhastriggers AS hastriggers FROM ((pg_class c LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace))) WHERE (c.relkind = 'r'::"char"); pg_timezone_abbrevs | SELECT pg_timezone_abbrevs.abbrev, pg_timezone_abbrevs.utc_offset, pg_timezone_abbrevs.is_dst FROM pg_timezone_abbrevs() pg_timezone_abbrevs(abbrev, utc_offset, is_dst); pg_timezone_names | SELECT pg_timezone_names.name, pg_timezone_names.abbrev, pg_timezone_names.utc_offset, pg_timezone_names.is_dst FROM pg_timezone_names() pg_timezone_names(name, abbrev, utc_offset, is_dst); diff --git a/src/test/regress/sql/arrays.sindex 9ea53b1..294b44e 100644 *** a/src/test/regress/sql/arrays.sql --- b/src/test/regress/sql/arrays.sql *************** *** 196,201 **** SELECT ARRAY[[1,2],[3,4]] || ARRAY[5,6] AS "{{1,2},{3,4},{5,6}}"; --- 196,203 ---- SELECT ARRAY[0,0] || ARRAY[1,1] || ARRAY[2,2] AS "{0,0,1,1,2,2}"; SELECT 0 || ARRAY[1,2] || 3 AS "{0,1,2,3}"; + ANALYZE array_op_test; + SELECT * FROM array_op_test WHERE i @> '{32}' ORDER BY seqno; SELECT * FROM array_op_test WHERE i && '{32}' ORDER BY seqno; SELECT * FROM array_op_test WHERE i @> '{17}' ORDER BY seqno;