From 7962a4a3e6b77596fde4df6e4ff97bda19408c52 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 2 Apr 2020 02:57:59 +0200 Subject: [PATCH 4/5] BRIN bloom indexes --- doc/src/sgml/brin.sgml | 215 +++++ doc/src/sgml/ref/create_index.sgml | 31 + src/backend/access/brin/Makefile | 1 + src/backend/access/brin/brin_bloom.c | 980 +++++++++++++++++++++++ src/include/access/brin.h | 2 + src/include/access/brin_internal.h | 4 + src/include/catalog/pg_amop.dat | 165 ++++ src/include/catalog/pg_amproc.dat | 430 ++++++++++ src/include/catalog/pg_opclass.dat | 69 ++ src/include/catalog/pg_opfamily.dat | 36 + src/include/catalog/pg_proc.dat | 20 + src/test/regress/expected/brin_bloom.out | 456 +++++++++++ src/test/regress/expected/opr_sanity.out | 3 +- src/test/regress/expected/psql.out | 3 +- src/test/regress/parallel_schedule | 5 + src/test/regress/serial_schedule | 1 + src/test/regress/sql/brin_bloom.sql | 404 ++++++++++ 17 files changed, 2823 insertions(+), 2 deletions(-) create mode 100644 src/backend/access/brin/brin_bloom.c create mode 100644 src/test/regress/expected/brin_bloom.out create mode 100644 src/test/regress/sql/brin_bloom.sql diff --git a/doc/src/sgml/brin.sgml b/doc/src/sgml/brin.sgml index 46a7d07bf8..900cd47a4c 100644 --- a/doc/src/sgml/brin.sgml +++ b/doc/src/sgml/brin.sgml @@ -132,6 +132,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was + + int8_bloom_ops + bigint + + = + + int8_minmax_ops bigint @@ -183,6 +190,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was |&> + + bytea_bloom_ops + bytea + + = + + bytea_minmax_ops bytea @@ -194,6 +208,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + bpchar_bloom_ops + character + + = + + bpchar_minmax_ops character @@ -205,6 +226,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + char_bloom_ops + "char" + + = + + char_minmax_ops "char" @@ -216,6 +244,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + date_bloom_ops + date + + = + + date_minmax_ops date @@ -227,6 +262,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + float8_bloom_ops + double precision + + = + + float8_minmax_ops double precision @@ -238,6 +280,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + inet_bloom_ops + inet + + = + + inet_minmax_ops inet @@ -261,6 +310,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was << + + int4_bloom_ops + integer + + = + + int4_minmax_ops integer @@ -272,6 +328,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + interval_bloom_ops + interval + + = + + interval_minmax_ops interval @@ -283,6 +346,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + macaddr_bloom_ops + macaddr + + = + + macaddr_minmax_ops macaddr @@ -294,6 +364,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + macaddr8_bloom_ops + macaddr8 + + = + + macaddr8_minmax_ops macaddr8 @@ -305,6 +382,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + name_bloom_ops + name + + = + + name_minmax_ops name @@ -316,6 +400,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + numeric_bloom_ops + numeric + + = + + numeric_minmax_ops numeric @@ -327,6 +418,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + pg_lsn_bloom_ops + pg_lsn + + = + + pg_lsn_minmax_ops pg_lsn @@ -338,6 +436,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + oid_bloom_ops + oid + + = + + oid_minmax_ops oid @@ -369,6 +474,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was >= + + float4_bloom_ops + real + + = + + float4_minmax_ops real @@ -380,6 +492,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + int2_bloom_ops + smallint + + = + + int2_minmax_ops smallint @@ -391,6 +510,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + text_bloom_ops + text + + = + + text_minmax_ops text @@ -413,6 +539,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + timestamp_bloom_ops + timestamp without time zone + + = + + timestamp_minmax_ops timestamp without time zone @@ -424,6 +557,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + timestamptz_bloom_ops + timestamp with time zone + + = + + timestamptz_minmax_ops timestamp with time zone @@ -435,6 +575,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + time_bloom_ops + time without time zone + + = + + time_minmax_ops time without time zone @@ -446,6 +593,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + timetz_bloom_ops + time with time zone + + = + + timetz_minmax_ops time with time zone @@ -457,6 +611,13 @@ LOG: request for BRIN range summarization for index "brin_wi_idx" page 128 was > + + uuid_bloom_ops + uuid + + = + + uuid_minmax_ops uuid @@ -811,6 +972,60 @@ typedef struct BrinOpcInfo function can improve index performance. + + To write an operator class for a data type that implements only an equality + operator and supports hashing, it is possible to use the bloom support procedures + alongside the corresponding operators, as shown in + . + All operator class members (procedures and operators) are mandatory. + + + + Procedure and Support Numbers for Bloom Operator Classes + + + + Operator class member + Object + + + + + Support Procedure 1 + internal function brin_bloom_opcinfo() + + + Support Procedure 2 + internal function brin_bloom_add_value() + + + Support Procedure 3 + internal function brin_bloom_consistent() + + + Support Procedure 4 + internal function brin_bloom_union() + + + Support Procedure 11 + function to compute hash of an element + + + Operator Strategy 1 + operator equal-to + + + +
+ + + Support procedure numbers 1-10 are reserved for the BRIN internal + functions, so the SQL level functions start with number 11. Support + function number 11 is the main function required to build the index. + It should accept one argument with the same data type as the operator class, + and return a hash of the value. + + Both minmax and inclusion operator classes support cross-data-type operators, though with these the dependencies become more complicated. diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index ff87b2d28f..f6380ef8e2 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -555,6 +555,37 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + + + n_distinct_per_range + + + Defines the estimated number of distinct non-null values in the block + range, used by BRIN bloom indexes for sizing of the + Bloom filter. It behaves similarly to n_distinct option + for . When set to a positive value, + each block range is assumed to contain this number of distinct non-null + values. When set to a negative value, which must be greater than or + equal to -1, the number of distinct non-null is assumed linear with + the maximum possible number of tuples in the block range (about 290 + rows per block). The default values is -0.1, and + the minimum number of distinct non-null values is 128. + + + + + + false_positive_rate + + + Defines the desired false positive rate used by BRIN + bloom indexes for sizing of the Bloom filter. The values must be be + greater than 0.0 and smaller than 1.0. The default values is 0.01, + which is 1% false positive rate. + + + + diff --git a/src/backend/access/brin/Makefile b/src/backend/access/brin/Makefile index 468e1e289a..6b56131215 100644 --- a/src/backend/access/brin/Makefile +++ b/src/backend/access/brin/Makefile @@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global OBJS = \ brin.o \ + brin_bloom.o \ brin_inclusion.o \ brin_minmax.o \ brin_pageops.o \ diff --git a/src/backend/access/brin/brin_bloom.c b/src/backend/access/brin/brin_bloom.c new file mode 100644 index 0000000000..b119e4e264 --- /dev/null +++ b/src/backend/access/brin/brin_bloom.c @@ -0,0 +1,980 @@ +/* + * brin_bloom.c + * Implementation of Bloom opclass for BRIN + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * A BRIN opclass summarizing page range into a bloom filter. + * + * Bloom filters allow efficient test whether a given page range contains + * a particular value. Therefore, if we summarize each page range into a + * bloom filter, we can easily and cheaply test wheter it containst values + * we get later. + * + * The index only supports equality operator, similarly to hash indexes. + * BRIN bloom indexes are however much smaller, and support only bitmap + * scans. + * + * Note: Don't confuse this with bloom indexes, implemented in a contrib + * module. That extension implements an entirely new AM, building a bloom + * filter on multiple columns in a single row. This opclass works with an + * existing AM (BRIN) and builds bloom filter on a column. + * + * + * values vs. hashes + * ----------------- + * + * The original column values are not used directly, but are first hashed + * using the regular type-specific hash function, producing a uint32 hash. + * And this hash value is then added to the summary - either stored as is + * (in the sorted mode), or hashed again and added to the bloom filter. + * + * This allows the code to treat all data types (byval/byref/...) the same + * way, with only minimal space requirements. For example we don't need to + * store varlena types differently in the sorted mode, etc. Everything is + * uint32 making it much simpler. + * + * Of course, this assumes the built-in hash function is reasonably good, + * without too many collisions etc. But that does seem to be the case, at + * least based on past experience. After all, the same hash functions are + * used for hash indexes, hash partitioning and so on. + * + * + * sizing the bloom filter + * ----------------------- + * + * Size of a bloom filter depends on the number of distinct values we will + * store in it, and the desired false positive rate. The higher the number + * of distinct values and/or the lower the false positive rate, the larger + * the bloom filter. On the other hand, we want to keep the index as small + * as possible - that's one of the basic advantages of BRIN indexes. + * + * The number of distinct elements (in a page range) depends on the data, + * we can consider it fixed. This simplifies the trade-off to just false + * positive rate vs. size. + * + * At the page range level, false positive rate is a probability the bloom + * filter matches a random value. For the whole index (with sufficiently + * many page ranges) it represents the fraction of the index ranges (and + * thus fraction of the table to be scanned) matching the random value. + * + * Furthermore, the size of the bloom filter is subject to implementation + * limits - it has to fit onto a single index page (8kB by default). As + * the bitmap is inherently random, compression can't reliably help here. + * To reduce the size of a filter (to fit to a page), we have to either + * accept higher false positive rate (undesirable), or reduce the number + * of distinct items to be stored in the filter. We can't quite the input + * data, of course, but we may make the BRIN page ranges smaller - instead + * of the default 128 pages (1MB) we may build index with 16-page ranges, + * or something like that. This does help even for random data sets, as + * the number of rows per heap page is limited (to ~290 with very narrow + * tables, likely ~20 in practice). + * + * Of course, good sizing decisions depend on having the necessary data, + * i.e. number of distinct values in a page range (of a given size) and + * table size (to estimate cost change due to change in false positive + * rate due to having larger index vs. scanning larger indexes). We may + * not have that data - for example when building an index on empty table + * it's not really possible. And for some data we only have estimates for + * the whole table and we can only estimate per-range values (ndistinct). + * + * Another challenge is that while the bloom filter is per-column, it's + * the whole index tuple that has to fit into a page. And for multi-column + * indexes that may include pieces we have no control over (not necessarily + * bloom filters, the other columns may use other BRIN opclasses). So it's + * not entirely clear how to distrubute the space between those columns. + * + * The current logic, implemented in brin_bloom_get_ndistinct, attempts to + * make some basic sizing decisions, based on the table ndistinct estimate. + * + * + * sort vs. hash + * ------------- + * + * As explained in the preceding section, sizing bloom filters correctly is + * difficult in practice. It's also true that bloom filters quickly degrade + * after exceeding the expected number of distinct items. It's therefore + * expected that people will overshoot the parameters a bit (particularly + * the ndistinct per page range), perhaps by a factor of 2 or more. + * + * It's also possible the data set is not uniform - it may have ranges with + * very many distinct items per range, but also ranges with only very few + * distinct items. The bloom filter has to be sized for the more variable + * ranges, making it rather wasteful in the less variable part. + * + * For example, if some page ranges have 1000 distinct values, that means + * about ~1.2kB bloom filter with 1% false positive rate. For page ranges + * that only contain 10 distinct values, that's wasteful, because we might + * store the values (the uint32 hashes) in just 40B. + * + * To address these issues, the opclass stores the raw values directly, and + * only switches to the actual bloom filter after reaching the same space + * requirements. + * + * + * IDENTIFICATION + * src/backend/access/brin/brin_bloom.c + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/brin.h" +#include "access/brin_internal.h" +#include "access/brin_tuple.h" +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/stratnum.h" +#include "catalog/pg_type.h" +#include "catalog/pg_amop.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +#include + +#define BloomEqualStrategyNumber 1 + +/* + * Additional SQL level support functions + * + * Procedure numbers must not use values reserved for BRIN itself; see + * brin_internal.h. + */ +#define BLOOM_MAX_PROCNUMS 1 /* maximum support procs we need */ +#define PROCNUM_HASH 11 /* required */ + +/* + * Subtract this from procnum to obtain index in BloomOpaque arrays + * (Must be equal to minimum of private procnums). + */ +#define PROCNUM_BASE 11 + +/* + * Flags. We only use a single bit for now, to decide whether we're in the + * sorted or hash phase. So we have 15 bits for future use, if needed. The + * filters are expected to be hundreds of bytes, so this is negligible. + */ +#define BLOOM_FLAG_PHASE_HASH 0x0001 + +#define BLOOM_IS_HASHED(f) ((f)->flags & BLOOM_FLAG_PHASE_HASH) +#define BLOOM_IS_SORTED(f) (!BLOOM_IS_HASHED(f)) + +/* + * Number of hashes to accumulate before deduplicating and sorting in the + * sort phase? We want this fairly small to reduce the amount of space and + * also speed-up bsearch lookups. + */ +#define BLOOM_MAX_UNSORTED 32 + +/* + * Storage type for BRIN's reloptions. + */ +typedef struct BloomOptions +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + double nDistinctPerRange; /* number of distinct values per range */ + double falsePositiveRate; /* false positive for bloom filter */ +} BloomOptions; + +/* + * The current min value (16) is somewhat arbitrary, but it's based + * on the fact that the filter header is ~20B alone, which is about + * the same as the filter bitmap for 16 distinct items with 1% false + * positive rate. So by allowing lower values we'd not gain much. In + * any case, the min should not be larger than MaxHeapTuplesPerPage + * (~290), which is the theoretical maximum for single-page ranges. + */ +#define BLOOM_MIN_NDISTINCT_PER_RANGE 16 + +/* + * Used to determine number of distinct items, based on the number of rows + * in a page range. The 10% is somewhat similar to what estimate_num_groups + * does, so we use the same factor here. + */ +#define BLOOM_DEFAULT_NDISTINCT_PER_RANGE -0.1 /* 10% of values */ + +/* + * The 1% value is mostly arbitrary, it just looks nice. + */ +#define BLOOM_DEFAULT_FALSE_POSITIVE_RATE 0.01 /* 1% fp rate */ + +#define BloomGetNDistinctPerRange(opts) \ + ((opts) && (((BloomOptions *) (opts))->nDistinctPerRange != 0) ? \ + (((BloomOptions *) (opts))->nDistinctPerRange) : \ + BLOOM_DEFAULT_NDISTINCT_PER_RANGE) + +#define BloomGetFalsePositiveRate(opts) \ + ((opts) && (((BloomOptions *) (opts))->falsePositiveRate != 0.0) ? \ + (((BloomOptions *) (opts))->falsePositiveRate) : \ + BLOOM_DEFAULT_FALSE_POSITIVE_RATE) + +/* + * Bloom Filter + * + * Represents a bloom filter, built on hashes of the indexed values. That is, + * we compute a uint32 hash of the value, and then store this hash into the + * bloom filter (and compute additional hashes on it). + * + * We use an optimisation that initially we store the uint32 values directly, + * without the extra hashing step. And only later filling the bitmap space, + * we switch to the regular bloom filter mode. + * + * PHASE_SORTED + * + * Initially we copy the uint32 hash into the bitmap, regularly sorting the + * hash values for fast lookup (we keep at most BLOOM_MAX_UNSORTED unsorted + * values). + * + * The idea is that if we only see very few distinct values, we can store + * them in less space compared to the (sparse) bloom filter bitmap. It also + * stores them exactly, although that's not a big advantage as almost-empty + * bloom filter has false positive rate close to zero anyway. + * + * PHASE_HASH + * + * Once we fill the bitmap space in the sorted phase, we switch to the hash + * phase, where we actually use the bloom filter. We treat the uint32 hashes + * as input values, and hash them again with different seeds (to get the k + * hash functions needed for bloom filter). + * + * + * XXX Perhaps we could save a few bytes by using different data types, but + * considering the size of the bitmap, the difference is negligible. + * + * XXX We could also implement "sparse" bloom filters, keeping only the + * bytes that are not entirely 0. That might make the "sorted" phase + * mostly unnecessary. + * + * XXX We can also watch the number of bits set in the bloom filter, and + * then stop using it (and not store the bitmap, to save space) when the + * false positive rate gets too high. + */ +typedef struct BloomFilter +{ + /* varlena header (do not touch directly!) */ + int32 vl_len_; + + /* space for various flags (phase, etc.) */ + uint16 flags; + + /* fields used only in the SORTED phase */ + uint16 nvalues; /* number of hashes stored (total) */ + uint16 nsorted; /* number of hashes in the sorted part */ + + /* fields for the HASHED phase */ + uint8 nhashes; /* number of hash functions */ + uint32 nbits; /* number of bits in the bitmap (size) */ + uint32 nbits_set; /* number of bits set to 1 */ + + /* data of the bloom filter (used both for sorted and hashed phase) */ + char data[FLEXIBLE_ARRAY_MEMBER]; + +} BloomFilter; + +static BloomFilter *bloom_switch_to_hashing(BloomFilter *filter); + + +/* + * bloom_init + * Initialize the Bloom Filter, allocate all the memory. + * + * The filter is initialized with optimal size for ndistinct expected + * values, and requested false positive rate. The filter is stored as + * varlena. + */ +static BloomFilter * +bloom_init(int ndistinct, double false_positive_rate) +{ + Size len; + BloomFilter *filter; + + int m; /* number of bits */ + double k; /* number of hash functions */ + + Assert(ndistinct > 0); + Assert((false_positive_rate > 0) && (false_positive_rate < 1.0)); + + m = ceil((ndistinct * log(false_positive_rate)) / log(1.0 / (pow(2.0, log(2.0))))); + + /* round m to whole bytes */ + m = ((m + 7) / 8) * 8; + + /* + * round(log(2.0) * m / ndistinct), but assume round() may not be + * available on Windows + */ + k = log(2.0) * m / ndistinct; + k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k); + + /* + * Allocate the bloom filter with a minimum size 64B (about 40B in the + * bitmap part). We require space at least for the header. + * + * XXX Maybe the 64B min size is not really needed? + */ + len = Max(offsetof(BloomFilter, data), 64); + + filter = (BloomFilter *) palloc0(len); + + filter->flags = 0; /* implies SORTED phase */ + filter->nhashes = (int) k; + filter->nbits = m; + + SET_VARSIZE(filter, len); + + return filter; +} + +/* simple uint32 comparator, for pg_qsort and bsearch */ +static int +cmp_uint32(const void *a, const void *b) +{ + uint32 *ia = (uint32 *) a; + uint32 *ib = (uint32 *) b; + + if (*ia == *ib) + return 0; + else if (*ia < *ib) + return -1; + else + return 1; +} + +/* + * bloom_compact + * Compact the filter during the 'sorted' phase. + * + * We sort the uint32 hashes and remove duplicates, for two main reasons. + * Firstly, to keep most of the data sorted for bsearch lookups. Secondly, + * we try to save space by removing the duplicates, allowing us to stay + * in the sorted phase a bit longer. + * + * We currently don't repalloc the bitmap, i.e. we don't free the memory + * here - in the worst case we waste space for up to 32 unsorted hashes + * (if all of them are already in the sorted part), so about 128B. We can + * either reduce the number of unsorted items (e.g. to 8 hashes, which + * would mean 32B), or start doing the repalloc. + * + * We do however set the varlena length, to minimize the storage needs. + */ +static void +bloom_compact(BloomFilter *filter) +{ + int i, + nvalues; + Size len; + uint32 *values; + + /* never call compact on filters in HASH phase */ + Assert(BLOOM_IS_SORTED(filter)); + + /* no chance to compact anything */ + if (filter->nvalues == filter->nsorted) + return; + + values = (uint32 *) filter->data; + + /* TODO optimization: sort only the unsorted part, then merge */ + pg_qsort(values, filter->nvalues, sizeof(uint32), cmp_uint32); + + nvalues = 1; + for (i = 1; i < filter->nvalues; i++) + { + /* if different from the last value, keep it */ + if (values[i] != values[nvalues - 1]) + values[nvalues++] = values[i]; + } + + filter->nvalues = nvalues; + filter->nsorted = nvalues; + + len = offsetof(BloomFilter, data) + + (filter->nvalues) * sizeof(uint32); + + SET_VARSIZE(filter, len); +} + +/* + * bloom_add_value + * Add value to the bloom filter. + */ +static BloomFilter * +bloom_add_value(BloomFilter *filter, uint32 value, bool *updated) +{ + int i; + uint32 big_h, h, d; + + /* assume 'not updated' by default */ + Assert(filter); + + /* if we're in the sorted phase, we store the hashes directly */ + if (BLOOM_IS_SORTED(filter)) + { + /* how many uint32 hashes can we fit into the bitmap */ + int maxvalues = filter->nbits / (8 * sizeof(uint32)); + + /* do not overflow the bitmap space or number of unsorted items */ + Assert(filter->nvalues <= maxvalues); + Assert(filter->nvalues - filter->nsorted <= BLOOM_MAX_UNSORTED); + + /* + * In this branch we always update the filter - we either add the + * hash to the unsorted part, or switch the filter to hashing. + */ + if (updated) + *updated = true; + + /* + * If the array is full, or if we reached the limit on unsorted + * items, try to compact the filter first, before attempting to + * add the new value. + */ + if ((filter->nvalues == maxvalues) || + (filter->nvalues - filter->nsorted == BLOOM_MAX_UNSORTED)) + bloom_compact(filter); + + /* + * Can we squeeze one more uint32 hash into the bitmap? Also make + * sure there's enough space in the bytea value first. + */ + if (filter->nvalues < maxvalues) + { + Size len = VARSIZE_ANY(filter); + Size need = offsetof(BloomFilter, data) + + (filter->nvalues + 1) * sizeof(uint32); + + /* + * We don't double the size here, as in the first place we care about + * reducing storage requirements, and the doubling happens automatically + * in memory contexts (so the repalloc should be cheap in most cases). + */ + if (len < need) + { + filter = (BloomFilter *) repalloc(filter, need); + SET_VARSIZE(filter, need); + } + + /* copy the new value into the filter */ + memcpy(&filter->data[filter->nvalues * sizeof(uint32)], + &value, sizeof(uint32)); + + filter->nvalues++; + + /* we're done */ + return filter; + } + + /* can't add any more exact hashes, so switch to hashing */ + filter = bloom_switch_to_hashing(filter); + } + + /* we better be in the hashing phase */ + Assert(BLOOM_IS_HASHED(filter)); + + /* compute the hashes, used for the bloom filter */ + big_h = ((uint32) DatumGetInt64(hash_uint32(value))); + + h = big_h % filter->nbits; + d = big_h % (filter->nbits - 1); + + /* compute the requested number of hashes */ + for (i = 0; i < filter->nhashes; i++) + { + int byte = (h / 8); + int bit = (h % 8); + + /* if the bit is not set, set it and remember we did that */ + if (! (filter->data[byte] & (0x01 << bit))) + { + filter->data[byte] |= (0x01 << bit); + filter->nbits_set++; + if (updated) + *updated = true; + } + + /* next bit */ + h += d++; + if (h >= filter->nbits) + h -= filter->nbits; + + if (d == filter->nbits) + d = 0; + } + + return filter; +} + +/* + * bloom_switch_to_hashing + * Switch the bloom filter from sorted to hashing mode. + */ +static BloomFilter * +bloom_switch_to_hashing(BloomFilter *filter) +{ + int i; + uint32 *values; + Size len; + BloomFilter *newfilter; + + Assert(filter->nbits % 8 == 0); + + /* + * The new filter is allocated with all the memory, directly into + * the HASH phase. + */ + len = offsetof(BloomFilter, data) + (filter->nbits / 8); + + newfilter = (BloomFilter *) palloc0(len); + + newfilter->nhashes = filter->nhashes; + newfilter->nbits = filter->nbits; + newfilter->flags |= BLOOM_FLAG_PHASE_HASH; + + SET_VARSIZE(newfilter, len); + + values = (uint32 *) filter->data; + + for (i = 0; i < filter->nvalues; i++) + /* ignore the return value here, re don't repalloc in hashing mode */ + bloom_add_value(newfilter, values[i], NULL); + + /* free the original filter, return the newly allocated one */ + pfree(filter); + + return newfilter; +} + +/* + * bloom_contains_value + * Check if the bloom filter contains a particular value. + */ +static bool +bloom_contains_value(BloomFilter *filter, uint32 value) +{ + int i; + uint32 big_h, h, d; + + Assert(filter); + + /* in sorted mode we simply search the two arrays (sorted, unsorted) */ + if (BLOOM_IS_SORTED(filter)) + { + int i; + uint32 *values = (uint32 *) filter->data; + + /* first search through the sorted part */ + if ((filter->nsorted > 0) && + (bsearch(&value, values, filter->nsorted, sizeof(uint32), cmp_uint32) != NULL)) + return true; + + /* now search through the unsorted part - linear search */ + for (i = filter->nsorted; i < filter->nvalues; i++) + { + if (value == values[i]) + return true; + } + + /* nothing found */ + return false; + } + + /* now the regular hashing mode */ + Assert(BLOOM_IS_HASHED(filter)); + + big_h = ((uint32) DatumGetInt64(hash_uint32(value))); + + h = big_h % filter->nbits; + d = big_h % (filter->nbits - 1); + + /* compute the requested number of hashes */ + for (i = 0; i < filter->nhashes; i++) + { + int byte = (h / 8); + int bit = (h % 8); + + /* if the bit is not set, the value is not there */ + if (! (filter->data[byte] & (0x01 << bit))) + return false; + + /* next bit */ + h += d++; + if (h >= filter->nbits) + h -= filter->nbits; + + if (d == filter->nbits) + d = 0; + } + + /* all hashes found in bloom filter */ + return true; +} + +typedef struct BloomOpaque +{ + /* + * XXX At this point we only need a single proc (to compute the hash), + * but let's keep the array just like inclusion and minman opclasses, + * for consistency. We may need additional procs in the future. + */ + FmgrInfo extra_procinfos[BLOOM_MAX_PROCNUMS]; + bool extra_proc_missing[BLOOM_MAX_PROCNUMS]; +} BloomOpaque; + +static FmgrInfo *bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, + uint16 procnum); + + +Datum +brin_bloom_opcinfo(PG_FUNCTION_ARGS) +{ + BrinOpcInfo *result; + + /* + * opaque->strategy_procinfos is initialized lazily; here it is set to + * all-uninitialized by palloc0 which sets fn_oid to InvalidOid. + * + * bloom indexes only store the filter as a single BYTEA column + */ + + result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) + + sizeof(BloomOpaque)); + result->oi_nstored = 1; + result->oi_regular_nulls = true; + result->oi_opaque = (BloomOpaque *) + MAXALIGN((char *) result + SizeofBrinOpcInfo(1)); + result->oi_typcache[0] = lookup_type_cache(BYTEAOID, 0); + + PG_RETURN_POINTER(result); +} + +/* + * brin_bloom_get_ndistinct + * Determine the ndistinct value used to size bloom filter. + * + * Tweak the ndistinct value based on the pagesPerRange value. First, + * if it's negative, it's assumed to be relative to maximum number of + * tuples in the range (assuming each page gets MaxHeapTuplesPerPage + * tuples, which is likely a significant over-estimate). We also clamp + * the value, not to over-size the bloom filter unnecessarily. + * + * XXX We can only do this when the pagesPerRange value was supplied. + * If it wasn't, it has to be a read-only access to the index, in which + * case we don't really care. But perhaps we should fall-back to the + * default pagesPerRange value? + * + * XXX We might also fetch info about ndistinct estimate for the column, + * and compute the expected number of distinct values in a range. But + * that may be tricky due to data being sorted in various ways, so it + * seems better to rely on the upper estimate. + */ +static int +brin_bloom_get_ndistinct(BrinDesc *bdesc, BloomOptions *opts) +{ + double ndistinct; + double maxtuples; + BlockNumber pagesPerRange; + + pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index); + ndistinct = BloomGetNDistinctPerRange(opts); + + Assert(BlockNumberIsValid(pagesPerRange)); + + maxtuples = MaxHeapTuplesPerPage * pagesPerRange; + + /* + * Similarly to n_distinct, negative values are relative - in this + * case to maximum number of tuples in the page range (maxtuples). + */ + if (ndistinct < 0) + ndistinct = (-ndistinct) * maxtuples; + + /* + * Positive values are to be used directly, but we still apply a + * couple of safeties no to use unreasonably small bloom filters. + */ + ndistinct = Max(ndistinct, BLOOM_MIN_NDISTINCT_PER_RANGE); + + /* + * And don't use more than the maximum possible number of tuples, + * in the range, which would be entirely wasteful. + */ + ndistinct = Min(ndistinct, maxtuples); + + return (int) ndistinct; +} + +static double +brin_bloom_get_fp_rate(BrinDesc *bdesc, BloomOptions *opts) +{ + return BloomGetFalsePositiveRate(opts); +} + +/* + * Examine the given index tuple (which contains partial status of a certain + * page range) by comparing it to the given value that comes from another heap + * tuple. If the new value is outside the bloom filter specified by the + * existing tuple values, update the index tuple and return true. Otherwise, + * return false and do not modify in this case. + */ +Datum +brin_bloom_add_value(PG_FUNCTION_ARGS) +{ + BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); + BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); + Datum newval = PG_GETARG_DATUM(2); + bool isnull PG_USED_FOR_ASSERTS_ONLY = PG_GETARG_DATUM(3); + BloomOptions *opts = (BloomOptions *) PG_GET_OPCLASS_OPTIONS(); + Oid colloid = PG_GET_COLLATION(); + FmgrInfo *hashFn; + uint32 hashValue; + bool updated = false; + AttrNumber attno; + BloomFilter *filter; + + Assert(!isnull); + + attno = column->bv_attno; + + /* + * If this is the first non-null value, we need to initialize the bloom + * filter. Otherwise just extract the existing bloom filter from BrinValues. + */ + if (column->bv_allnulls) + { + filter = bloom_init(brin_bloom_get_ndistinct(bdesc, opts), + brin_bloom_get_fp_rate(bdesc, opts)); + column->bv_values[0] = PointerGetDatum(filter); + column->bv_allnulls = false; + updated = true; + } + else + filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]); + + /* + * Compute the hash of the new value, using the supplied hash function, + * and then add the hash value to the bloom filter. + */ + hashFn = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH); + + hashValue = DatumGetUInt32(FunctionCall1Coll(hashFn, colloid, newval)); + + filter = bloom_add_value(filter, hashValue, &updated); + + column->bv_values[0] = PointerGetDatum(filter); + + PG_RETURN_BOOL(updated); +} + +/* + * Given an index tuple corresponding to a certain page range and a scan key, + * return whether the scan key is consistent with the index tuple's bloom + * filter. Return true if so, false otherwise. + */ +Datum +brin_bloom_consistent(PG_FUNCTION_ARGS) +{ + BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0); + BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1); + ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2); + int nkeys = PG_GETARG_INT32(3); + Oid colloid = PG_GET_COLLATION(); + AttrNumber attno; + Datum value; + Datum matches; + FmgrInfo *finfo; + uint32 hashValue; + BloomFilter *filter; + int keyno; + + filter = (BloomFilter *) PG_DETOAST_DATUM(column->bv_values[0]); + + Assert(filter); + + matches = true; + + for (keyno = 0; keyno < nkeys; keyno++) + { + ScanKey key = keys[keyno]; + + /* NULL keys are handled and filtered-out in bringetbitmap */ + Assert(!(key->sk_flags & SK_ISNULL)); + + attno = key->sk_attno; + value = key->sk_argument; + + switch (key->sk_strategy) + { + case BloomEqualStrategyNumber: + + /* + * In the equality case (WHERE col = someval), we want to return + * the current page range if the minimum value in the range <= + * scan key, and the maximum value >= scan key. + */ + finfo = bloom_get_procinfo(bdesc, attno, PROCNUM_HASH); + + hashValue = DatumGetUInt32(FunctionCall1Coll(finfo, colloid, value)); + matches &= bloom_contains_value(filter, hashValue); + + break; + default: + /* shouldn't happen */ + elog(ERROR, "invalid strategy number %d", key->sk_strategy); + matches = 0; + break; + } + + if (!matches) + break; + } + + PG_RETURN_DATUM(matches); +} + +/* + * Given two BrinValues, update the first of them as a union of the summary + * values contained in both. The second one is untouched. + * + * XXX We assume the bloom filters have the same parameters fow now. In the + * future we should have 'can union' function, to decide if we can combine + * two particular bloom filters. + */ +Datum +brin_bloom_union(PG_FUNCTION_ARGS) +{ + BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1); + BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2); + BloomFilter *filter_a; + BloomFilter *filter_b; + + Assert(col_a->bv_attno == col_b->bv_attno); + Assert(!col_a->bv_allnulls && !col_b->bv_allnulls); + + filter_a = (BloomFilter *) PG_DETOAST_DATUM(col_a->bv_values[0]); + filter_b = (BloomFilter *) PG_DETOAST_DATUM(col_b->bv_values[0]); + + /* make sure neither of the bloom filters is NULL */ + Assert(filter_a && filter_b); + Assert(filter_a->nbits == filter_b->nbits); + + /* + * Merging of the filters depends on the phase of both filters. + */ + if (BLOOM_IS_SORTED(filter_b)) + { + /* + * Simply read all items from 'b' and add them to 'a' (the phase of + * 'a' does not really matter). + */ + int i; + uint32 *values = (uint32 *) filter_b->data; + + for (i = 0; i < filter_b->nvalues; i++) + filter_a = bloom_add_value(filter_a, values[i], NULL); + + col_a->bv_values[0] = PointerGetDatum(filter_a); + } + else if (BLOOM_IS_SORTED(filter_a)) + { + /* + * 'b' hashed, 'a' sorted - copy 'b' into 'a' and then add all values + * from 'a' into the new copy. + */ + int i; + BloomFilter *filter_c; + uint32 *values = (uint32 *) filter_a->data; + + filter_c = (BloomFilter *) PG_DETOAST_DATUM(datumCopy(PointerGetDatum(filter_b), false, -1)); + + for (i = 0; i < filter_a->nvalues; i++) + filter_c = bloom_add_value(filter_c, values[i], NULL); + + col_a->bv_values[0] = PointerGetDatum(filter_c); + } + else if (BLOOM_IS_HASHED(filter_a)) + { + /* + * 'b' hashed, 'a' hashed - merge the bitmaps by OR + */ + int i; + int nbytes = (filter_a->nbits + 7) / 8; + + /* we better have "compatible" bloom filters */ + Assert(filter_a->nbits == filter_b->nbits); + Assert(filter_a->nhashes == filter_b->nhashes); + + for (i = 0; i < nbytes; i++) + filter_a->data[i] |= filter_b->data[i]; + } + + PG_RETURN_VOID(); +} + +/* + * Cache and return inclusion opclass support procedure + * + * Return the procedure corresponding to the given function support number + * or null if it is not exists. + */ +static FmgrInfo * +bloom_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum) +{ + BloomOpaque *opaque; + uint16 basenum = procnum - PROCNUM_BASE; + + /* + * We cache these in the opaque struct, to avoid repetitive syscache + * lookups. + */ + opaque = (BloomOpaque *) bdesc->bd_info[attno - 1]->oi_opaque; + + /* + * If we already searched for this proc and didn't find it, don't bother + * searching again. + */ + if (opaque->extra_proc_missing[basenum]) + return NULL; + + if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid) + { + if (RegProcedureIsValid(index_getprocid(bdesc->bd_index, attno, + procnum))) + { + fmgr_info_copy(&opaque->extra_procinfos[basenum], + index_getprocinfo(bdesc->bd_index, attno, procnum), + bdesc->bd_context); + } + else + { + opaque->extra_proc_missing[basenum] = true; + return NULL; + } + } + + return &opaque->extra_procinfos[basenum]; +} + +Datum +brin_bloom_options(PG_FUNCTION_ARGS) +{ + local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0); + + init_local_reloptions(relopts, sizeof(BloomOptions)); + + add_local_real_reloption(relopts, "n_distinct_per_range", + "number of distinct items expected in a BRIN page range", + BLOOM_DEFAULT_NDISTINCT_PER_RANGE, + -1.0, INT_MAX, offsetof(BloomOptions, nDistinctPerRange)); + + add_local_real_reloption(relopts, "false_positive_rate", + "desired false-positive rate for the bloom filters", + BLOOM_DEFAULT_FALSE_POSITIVE_RATE, + 0.001, 1.0, offsetof(BloomOptions, falsePositiveRate)); + + PG_RETURN_VOID(); +} diff --git a/src/include/access/brin.h b/src/include/access/brin.h index 0987878dd2..b02298c0aa 100644 --- a/src/include/access/brin.h +++ b/src/include/access/brin.h @@ -22,6 +22,8 @@ typedef struct BrinOptions int32 vl_len_; /* varlena header (do not touch directly!) */ BlockNumber pagesPerRange; bool autosummarize; + double nDistinctPerRange; /* number of distinct values per range */ + double falsePositiveRate; /* false positive for bloom filter */ } BrinOptions; diff --git a/src/include/access/brin_internal.h b/src/include/access/brin_internal.h index 7c4f3da0a0..e3c9d47503 100644 --- a/src/include/access/brin_internal.h +++ b/src/include/access/brin_internal.h @@ -58,6 +58,10 @@ typedef struct BrinDesc /* total number of Datum entries that are stored on-disk for all columns */ int bd_totalstored; + /* parameters for sizing bloom filter (BRIN bloom opclasses) */ + double bd_nDistinctPerRange; + double bd_falsePositiveRange; + /* per-column info; bd_tupdesc->natts entries long */ BrinOpcInfo *bd_info[FLEXIBLE_ARRAY_MEMBER]; } BrinDesc; diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat index 1dfb6fd373..af6891e348 100644 --- a/src/include/catalog/pg_amop.dat +++ b/src/include/catalog/pg_amop.dat @@ -1705,6 +1705,11 @@ amoprighttype => 'bytea', amopstrategy => '5', amopopr => '>(bytea,bytea)', amopmethod => 'brin' }, +# bloom bytea +{ amopfamily => 'brin/bytea_bloom_ops', amoplefttype => 'bytea', + amoprighttype => 'bytea', amopstrategy => '1', amopopr => '=(bytea,bytea)', + amopmethod => 'brin' }, + # minmax "char" { amopfamily => 'brin/char_minmax_ops', amoplefttype => 'char', amoprighttype => 'char', amopstrategy => '1', amopopr => '<(char,char)', @@ -1722,6 +1727,11 @@ amoprighttype => 'char', amopstrategy => '5', amopopr => '>(char,char)', amopmethod => 'brin' }, +# bloom "char" +{ amopfamily => 'brin/char_bloom_ops', amoplefttype => 'char', + amoprighttype => 'char', amopstrategy => '1', amopopr => '=(char,char)', + amopmethod => 'brin' }, + # minmax name { amopfamily => 'brin/name_minmax_ops', amoplefttype => 'name', amoprighttype => 'name', amopstrategy => '1', amopopr => '<(name,name)', @@ -1739,6 +1749,11 @@ amoprighttype => 'name', amopstrategy => '5', amopopr => '>(name,name)', amopmethod => 'brin' }, +# bloom name +{ amopfamily => 'brin/name_bloom_ops', amoplefttype => 'name', + amoprighttype => 'name', amopstrategy => '1', amopopr => '=(name,name)', + amopmethod => 'brin' }, + # minmax integer { amopfamily => 'brin/integer_minmax_ops', amoplefttype => 'int8', @@ -1885,6 +1900,44 @@ amoprighttype => 'int8', amopstrategy => '5', amopopr => '>(int4,int8)', amopmethod => 'brin' }, +# bloom integer + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int8', + amoprighttype => 'int8', amopstrategy => '1', amopopr => '=(int8,int8)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int8', + amoprighttype => 'int2', amopstrategy => '1', amopopr => '=(int8,int2)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int8', + amoprighttype => 'int4', amopstrategy => '1', amopopr => '=(int8,int4)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int2', + amoprighttype => 'int2', amopstrategy => '1', amopopr => '=(int2,int2)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int2', + amoprighttype => 'int8', amopstrategy => '1', amopopr => '=(int2,int8)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int2', + amoprighttype => 'int4', amopstrategy => '1', amopopr => '=(int2,int4)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int4', + amoprighttype => 'int4', amopstrategy => '1', amopopr => '=(int4,int4)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int4', + amoprighttype => 'int2', amopstrategy => '1', amopopr => '=(int4,int2)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/integer_bloom_ops', amoplefttype => 'int4', + amoprighttype => 'int8', amopstrategy => '1', amopopr => '=(int4,int8)', + amopmethod => 'brin' }, + # minmax text { amopfamily => 'brin/text_minmax_ops', amoplefttype => 'text', amoprighttype => 'text', amopstrategy => '1', amopopr => '<(text,text)', @@ -1902,6 +1955,11 @@ amoprighttype => 'text', amopstrategy => '5', amopopr => '>(text,text)', amopmethod => 'brin' }, +# bloom text +{ amopfamily => 'brin/text_bloom_ops', amoplefttype => 'text', + amoprighttype => 'text', amopstrategy => '1', amopopr => '=(text,text)', + amopmethod => 'brin' }, + # minmax oid { amopfamily => 'brin/oid_minmax_ops', amoplefttype => 'oid', amoprighttype => 'oid', amopstrategy => '1', amopopr => '<(oid,oid)', @@ -1919,6 +1977,11 @@ amoprighttype => 'oid', amopstrategy => '5', amopopr => '>(oid,oid)', amopmethod => 'brin' }, +# bloom oid +{ amopfamily => 'brin/oid_bloom_ops', amoplefttype => 'oid', + amoprighttype => 'oid', amopstrategy => '1', amopopr => '=(oid,oid)', + amopmethod => 'brin' }, + # minmax tid { amopfamily => 'brin/tid_minmax_ops', amoplefttype => 'tid', amoprighttype => 'tid', amopstrategy => '1', amopopr => '<(tid,tid)', @@ -2002,6 +2065,20 @@ amoprighttype => 'float8', amopstrategy => '5', amopopr => '>(float8,float8)', amopmethod => 'brin' }, +# bloom float +{ amopfamily => 'brin/float_bloom_ops', amoplefttype => 'float4', + amoprighttype => 'float4', amopstrategy => '1', amopopr => '=(float4,float4)', + amopmethod => 'brin' }, +{ amopfamily => 'brin/float_bloom_ops', amoplefttype => 'float4', + amoprighttype => 'float8', amopstrategy => '1', amopopr => '=(float4,float8)', + amopmethod => 'brin' }, +{ amopfamily => 'brin/float_bloom_ops', amoplefttype => 'float8', + amoprighttype => 'float4', amopstrategy => '1', amopopr => '=(float8,float4)', + amopmethod => 'brin' }, +{ amopfamily => 'brin/float_bloom_ops', amoplefttype => 'float8', + amoprighttype => 'float8', amopstrategy => '1', amopopr => '=(float8,float8)', + amopmethod => 'brin' }, + # minmax macaddr { amopfamily => 'brin/macaddr_minmax_ops', amoplefttype => 'macaddr', amoprighttype => 'macaddr', amopstrategy => '1', @@ -2019,6 +2096,11 @@ amoprighttype => 'macaddr', amopstrategy => '5', amopopr => '>(macaddr,macaddr)', amopmethod => 'brin' }, +# bloom macaddr +{ amopfamily => 'brin/macaddr_bloom_ops', amoplefttype => 'macaddr', + amoprighttype => 'macaddr', amopstrategy => '1', + amopopr => '=(macaddr,macaddr)', amopmethod => 'brin' }, + # minmax macaddr8 { amopfamily => 'brin/macaddr8_minmax_ops', amoplefttype => 'macaddr8', amoprighttype => 'macaddr8', amopstrategy => '1', @@ -2036,6 +2118,11 @@ amoprighttype => 'macaddr8', amopstrategy => '5', amopopr => '>(macaddr8,macaddr8)', amopmethod => 'brin' }, +# bloom macaddr8 +{ amopfamily => 'brin/macaddr8_bloom_ops', amoplefttype => 'macaddr8', + amoprighttype => 'macaddr8', amopstrategy => '1', + amopopr => '=(macaddr8,macaddr8)', amopmethod => 'brin' }, + # minmax inet { amopfamily => 'brin/network_minmax_ops', amoplefttype => 'inet', amoprighttype => 'inet', amopstrategy => '1', amopopr => '<(inet,inet)', @@ -2053,6 +2140,11 @@ amoprighttype => 'inet', amopstrategy => '5', amopopr => '>(inet,inet)', amopmethod => 'brin' }, +# bloom inet +{ amopfamily => 'brin/network_bloom_ops', amoplefttype => 'inet', + amoprighttype => 'inet', amopstrategy => '1', amopopr => '=(inet,inet)', + amopmethod => 'brin' }, + # inclusion inet { amopfamily => 'brin/network_inclusion_ops', amoplefttype => 'inet', amoprighttype => 'inet', amopstrategy => '3', amopopr => '&&(inet,inet)', @@ -2090,6 +2182,11 @@ amoprighttype => 'bpchar', amopstrategy => '5', amopopr => '>(bpchar,bpchar)', amopmethod => 'brin' }, +# bloom character +{ amopfamily => 'brin/bpchar_bloom_ops', amoplefttype => 'bpchar', + amoprighttype => 'bpchar', amopstrategy => '1', amopopr => '=(bpchar,bpchar)', + amopmethod => 'brin' }, + # minmax time without time zone { amopfamily => 'brin/time_minmax_ops', amoplefttype => 'time', amoprighttype => 'time', amopstrategy => '1', amopopr => '<(time,time)', @@ -2107,6 +2204,11 @@ amoprighttype => 'time', amopstrategy => '5', amopopr => '>(time,time)', amopmethod => 'brin' }, +# bloom time without time zone +{ amopfamily => 'brin/time_bloom_ops', amoplefttype => 'time', + amoprighttype => 'time', amopstrategy => '1', amopopr => '=(time,time)', + amopmethod => 'brin' }, + # minmax datetime (date, timestamp, timestamptz) { amopfamily => 'brin/datetime_minmax_ops', amoplefttype => 'timestamp', @@ -2253,6 +2355,44 @@ amoprighttype => 'timestamptz', amopstrategy => '5', amopopr => '>(timestamptz,timestamptz)', amopmethod => 'brin' }, +# bloom datetime (date, timestamp, timestamptz) + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamp', + amoprighttype => 'timestamp', amopstrategy => '1', + amopopr => '=(timestamp,timestamp)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamp', + amoprighttype => 'date', amopstrategy => '1', amopopr => '=(timestamp,date)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamp', + amoprighttype => 'timestamptz', amopstrategy => '1', + amopopr => '=(timestamp,timestamptz)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'date', + amoprighttype => 'date', amopstrategy => '1', amopopr => '=(date,date)', + amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'date', + amoprighttype => 'timestamp', amopstrategy => '1', + amopopr => '=(date,timestamp)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'date', + amoprighttype => 'timestamptz', amopstrategy => '1', + amopopr => '=(date,timestamptz)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamptz', + amoprighttype => 'date', amopstrategy => '1', + amopopr => '=(timestamptz,date)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamptz', + amoprighttype => 'timestamp', amopstrategy => '1', + amopopr => '=(timestamptz,timestamp)', amopmethod => 'brin' }, + +{ amopfamily => 'brin/datetime_bloom_ops', amoplefttype => 'timestamptz', + amoprighttype => 'timestamptz', amopstrategy => '1', + amopopr => '=(timestamptz,timestamptz)', amopmethod => 'brin' }, + # minmax interval { amopfamily => 'brin/interval_minmax_ops', amoplefttype => 'interval', amoprighttype => 'interval', amopstrategy => '1', @@ -2270,6 +2410,11 @@ amoprighttype => 'interval', amopstrategy => '5', amopopr => '>(interval,interval)', amopmethod => 'brin' }, +# bloom interval +{ amopfamily => 'brin/interval_bloom_ops', amoplefttype => 'interval', + amoprighttype => 'interval', amopstrategy => '1', + amopopr => '=(interval,interval)', amopmethod => 'brin' }, + # minmax time with time zone { amopfamily => 'brin/timetz_minmax_ops', amoplefttype => 'timetz', amoprighttype => 'timetz', amopstrategy => '1', amopopr => '<(timetz,timetz)', @@ -2287,6 +2432,11 @@ amoprighttype => 'timetz', amopstrategy => '5', amopopr => '>(timetz,timetz)', amopmethod => 'brin' }, +# bloom time with time zone +{ amopfamily => 'brin/timetz_bloom_ops', amoplefttype => 'timetz', + amoprighttype => 'timetz', amopstrategy => '1', amopopr => '=(timetz,timetz)', + amopmethod => 'brin' }, + # minmax bit { amopfamily => 'brin/bit_minmax_ops', amoplefttype => 'bit', amoprighttype => 'bit', amopstrategy => '1', amopopr => '<(bit,bit)', @@ -2338,6 +2488,11 @@ amoprighttype => 'numeric', amopstrategy => '5', amopopr => '>(numeric,numeric)', amopmethod => 'brin' }, +# bloom numeric +{ amopfamily => 'brin/numeric_bloom_ops', amoplefttype => 'numeric', + amoprighttype => 'numeric', amopstrategy => '1', + amopopr => '=(numeric,numeric)', amopmethod => 'brin' }, + # minmax uuid { amopfamily => 'brin/uuid_minmax_ops', amoplefttype => 'uuid', amoprighttype => 'uuid', amopstrategy => '1', amopopr => '<(uuid,uuid)', @@ -2355,6 +2510,11 @@ amoprighttype => 'uuid', amopstrategy => '5', amopopr => '>(uuid,uuid)', amopmethod => 'brin' }, +# bloom uuid +{ amopfamily => 'brin/uuid_bloom_ops', amoplefttype => 'uuid', + amoprighttype => 'uuid', amopstrategy => '1', amopopr => '=(uuid,uuid)', + amopmethod => 'brin' }, + # inclusion range types { amopfamily => 'brin/range_inclusion_ops', amoplefttype => 'anyrange', amoprighttype => 'anyrange', amopstrategy => '1', @@ -2416,6 +2576,11 @@ amoprighttype => 'pg_lsn', amopstrategy => '5', amopopr => '>(pg_lsn,pg_lsn)', amopmethod => 'brin' }, +# bloom pg_lsn +{ amopfamily => 'brin/pg_lsn_bloom_ops', amoplefttype => 'pg_lsn', + amoprighttype => 'pg_lsn', amopstrategy => '1', amopopr => '=(pg_lsn,pg_lsn)', + amopmethod => 'brin' }, + # inclusion box { amopfamily => 'brin/box_inclusion_ops', amoplefttype => 'box', amoprighttype => 'box', amopstrategy => '1', amopopr => '<<(box,box)', diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 37b580883f..c24a80545a 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -770,6 +770,24 @@ { amprocfamily => 'brin/bytea_minmax_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom bytea +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/bytea_bloom_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '11', amproc => 'hashvarlena' }, + # minmax "char" { amprocfamily => 'brin/char_minmax_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '1', @@ -783,6 +801,24 @@ { amprocfamily => 'brin/char_minmax_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom "char" +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/char_bloom_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '11', amproc => 'hashchar' }, + # minmax name { amprocfamily => 'brin/name_minmax_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '1', @@ -796,6 +832,24 @@ { amprocfamily => 'brin/name_minmax_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom name +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/name_bloom_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '11', amproc => 'hashname' }, + # minmax integer: int2, int4, int8 { amprocfamily => 'brin/integer_minmax_ops', amproclefttype => 'int8', amprocrighttype => 'int8', amprocnum => '1', @@ -897,6 +951,58 @@ { amprocfamily => 'brin/integer_minmax_ops', amproclefttype => 'int4', amprocrighttype => 'int2', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom integer: int2, int4, int8 +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '11', amproc => 'hashint8' }, + +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '11', amproc => 'hashint2' }, + +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/integer_bloom_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '11', amproc => 'hashint4' }, + # minmax text { amprocfamily => 'brin/text_minmax_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '1', @@ -910,6 +1016,24 @@ { amprocfamily => 'brin/text_minmax_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom text +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/text_bloom_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '11', amproc => 'hashtext' }, + # minmax oid { amprocfamily => 'brin/oid_minmax_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '1', amproc => 'brin_minmax_opcinfo' }, @@ -922,6 +1046,23 @@ { amprocfamily => 'brin/oid_minmax_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom oid +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '1', amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/oid_bloom_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '11', amproc => 'hashoid' }, + # minmax tid { amprocfamily => 'brin/tid_minmax_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '1', amproc => 'brin_minmax_opcinfo' }, @@ -984,6 +1125,45 @@ amprocrighttype => 'float4', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom float +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float4', + amprocrighttype => 'float4', amprocnum => '11', + amproc => 'hashfloat4' }, + +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/float_bloom_ops', amproclefttype => 'float8', + amprocrighttype => 'float8', amprocnum => '11', + amproc => 'hashfloat8' }, + # minmax macaddr { amprocfamily => 'brin/macaddr_minmax_ops', amproclefttype => 'macaddr', amprocrighttype => 'macaddr', amprocnum => '1', @@ -998,6 +1178,26 @@ amprocrighttype => 'macaddr', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom macaddr +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/macaddr_bloom_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '11', + amproc => 'hashmacaddr' }, + # minmax macaddr8 { amprocfamily => 'brin/macaddr8_minmax_ops', amproclefttype => 'macaddr8', amprocrighttype => 'macaddr8', amprocnum => '1', @@ -1012,6 +1212,26 @@ amprocrighttype => 'macaddr8', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom macaddr8 +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/macaddr8_bloom_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '11', + amproc => 'hashmacaddr8' }, + # minmax inet { amprocfamily => 'brin/network_minmax_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '1', @@ -1025,6 +1245,24 @@ { amprocfamily => 'brin/network_minmax_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom inet +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/network_bloom_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '11', amproc => 'hashinet' }, + # inclusion inet { amprocfamily => 'brin/network_inclusion_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '1', @@ -1059,6 +1297,26 @@ amprocrighttype => 'bpchar', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom character +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/bpchar_bloom_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '11', + amproc => 'hashchar' }, + # minmax time without time zone { amprocfamily => 'brin/time_minmax_ops', amproclefttype => 'time', amprocrighttype => 'time', amprocnum => '1', @@ -1072,6 +1330,24 @@ { amprocfamily => 'brin/time_minmax_ops', amproclefttype => 'time', amprocrighttype => 'time', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom time without time zone +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/time_bloom_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '11', amproc => 'time_hash' }, + # minmax datetime (date, timestamp, timestamptz) { amprocfamily => 'brin/datetime_minmax_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamp', amprocnum => '1', @@ -1179,6 +1455,62 @@ amprocrighttype => 'timestamptz', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom datetime (date, timestamp, timestamptz) +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '11', + amproc => 'timestamp_hash' }, + +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '11', + amproc => 'timestamp_hash' }, + +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/datetime_bloom_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '11', amproc => 'hashint4' }, + # minmax interval { amprocfamily => 'brin/interval_minmax_ops', amproclefttype => 'interval', amprocrighttype => 'interval', amprocnum => '1', @@ -1193,6 +1525,26 @@ amprocrighttype => 'interval', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom interval +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/interval_bloom_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '11', + amproc => 'interval_hash' }, + # minmax time with time zone { amprocfamily => 'brin/timetz_minmax_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '1', @@ -1207,6 +1559,26 @@ amprocrighttype => 'timetz', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom time with time zone +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/timetz_bloom_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '11', + amproc => 'timetz_hash' }, + # minmax bit { amprocfamily => 'brin/bit_minmax_ops', amproclefttype => 'bit', amprocrighttype => 'bit', amprocnum => '1', amproc => 'brin_minmax_opcinfo' }, @@ -1247,6 +1619,26 @@ amprocrighttype => 'numeric', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom numeric +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/numeric_bloom_ops', amproclefttype => 'numeric', + amprocrighttype => 'numeric', amprocnum => '11', + amproc => 'hash_numeric' }, + # minmax uuid { amprocfamily => 'brin/uuid_minmax_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '1', @@ -1260,6 +1652,24 @@ { amprocfamily => 'brin/uuid_minmax_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom uuid +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '4', amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/uuid_bloom_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '11', amproc => 'uuid_hash' }, + # inclusion range types { amprocfamily => 'brin/range_inclusion_ops', amproclefttype => 'anyrange', amprocrighttype => 'anyrange', amprocnum => '1', @@ -1295,6 +1705,26 @@ amprocrighttype => 'pg_lsn', amprocnum => '4', amproc => 'brin_minmax_union' }, +# bloom pg_lsn +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '1', + amproc => 'brin_bloom_opcinfo' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '2', + amproc => 'brin_bloom_add_value' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '3', + amproc => 'brin_bloom_consistent' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '4', + amproc => 'brin_bloom_union' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '5', + amproc => 'brin_bloom_options' }, +{ amprocfamily => 'brin/pg_lsn_bloom_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '11', + amproc => 'pg_lsn_hash' }, + # inclusion box { amprocfamily => 'brin/box_inclusion_ops', amproclefttype => 'box', amprocrighttype => 'box', amprocnum => '1', diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index f2342bb328..16d6111ab6 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -257,67 +257,127 @@ { opcmethod => 'brin', opcname => 'bytea_minmax_ops', opcfamily => 'brin/bytea_minmax_ops', opcintype => 'bytea', opckeytype => 'bytea' }, +{ opcmethod => 'brin', opcname => 'bytea_bloom_ops', + opcfamily => 'brin/bytea_bloom_ops', opcintype => 'bytea', + opckeytype => 'bytea', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'char_minmax_ops', opcfamily => 'brin/char_minmax_ops', opcintype => 'char', opckeytype => 'char' }, +{ opcmethod => 'brin', opcname => 'char_bloom_ops', + opcfamily => 'brin/char_bloom_ops', opcintype => 'char', + opckeytype => 'char', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'name_minmax_ops', opcfamily => 'brin/name_minmax_ops', opcintype => 'name', opckeytype => 'name' }, +{ opcmethod => 'brin', opcname => 'name_bloom_ops', + opcfamily => 'brin/name_bloom_ops', opcintype => 'name', + opckeytype => 'name', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'int8_minmax_ops', opcfamily => 'brin/integer_minmax_ops', opcintype => 'int8', opckeytype => 'int8' }, +{ opcmethod => 'brin', opcname => 'int8_bloom_ops', + opcfamily => 'brin/integer_bloom_ops', opcintype => 'int8', + opckeytype => 'int8', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'int2_minmax_ops', opcfamily => 'brin/integer_minmax_ops', opcintype => 'int2', opckeytype => 'int2' }, +{ opcmethod => 'brin', opcname => 'int2_bloom_ops', + opcfamily => 'brin/integer_bloom_ops', opcintype => 'int2', + opckeytype => 'int2', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'int4_minmax_ops', opcfamily => 'brin/integer_minmax_ops', opcintype => 'int4', opckeytype => 'int4' }, +{ opcmethod => 'brin', opcname => 'int4_bloom_ops', + opcfamily => 'brin/integer_bloom_ops', opcintype => 'int4', + opckeytype => 'int4', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'text_minmax_ops', opcfamily => 'brin/text_minmax_ops', opcintype => 'text', opckeytype => 'text' }, +{ opcmethod => 'brin', opcname => 'text_bloom_ops', + opcfamily => 'brin/text_bloom_ops', opcintype => 'text', + opckeytype => 'text', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'oid_minmax_ops', opcfamily => 'brin/oid_minmax_ops', opcintype => 'oid', opckeytype => 'oid' }, +{ opcmethod => 'brin', opcname => 'oid_bloom_ops', + opcfamily => 'brin/oid_bloom_ops', opcintype => 'oid', + opckeytype => 'oid', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'tid_minmax_ops', opcfamily => 'brin/tid_minmax_ops', opcintype => 'tid', opckeytype => 'tid' }, { opcmethod => 'brin', opcname => 'float4_minmax_ops', opcfamily => 'brin/float_minmax_ops', opcintype => 'float4', opckeytype => 'float4' }, +{ opcmethod => 'brin', opcname => 'float4_bloom_ops', + opcfamily => 'brin/float_bloom_ops', opcintype => 'float4', + opckeytype => 'float4', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'float8_minmax_ops', opcfamily => 'brin/float_minmax_ops', opcintype => 'float8', opckeytype => 'float8' }, +{ opcmethod => 'brin', opcname => 'float8_bloom_ops', + opcfamily => 'brin/float_bloom_ops', opcintype => 'float8', + opckeytype => 'float8', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'macaddr_minmax_ops', opcfamily => 'brin/macaddr_minmax_ops', opcintype => 'macaddr', opckeytype => 'macaddr' }, +{ opcmethod => 'brin', opcname => 'macaddr_bloom_ops', + opcfamily => 'brin/macaddr_bloom_ops', opcintype => 'macaddr', + opckeytype => 'macaddr', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'macaddr8_minmax_ops', opcfamily => 'brin/macaddr8_minmax_ops', opcintype => 'macaddr8', opckeytype => 'macaddr8' }, +{ opcmethod => 'brin', opcname => 'macaddr8_bloom_ops', + opcfamily => 'brin/macaddr8_bloom_ops', opcintype => 'macaddr8', + opckeytype => 'macaddr8', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'inet_minmax_ops', opcfamily => 'brin/network_minmax_ops', opcintype => 'inet', opcdefault => 'f', opckeytype => 'inet' }, +{ opcmethod => 'brin', opcname => 'inet_bloom_ops', + opcfamily => 'brin/network_bloom_ops', opcintype => 'inet', + opcdefault => 'f', opckeytype => 'inet', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'inet_inclusion_ops', opcfamily => 'brin/network_inclusion_ops', opcintype => 'inet', opckeytype => 'inet' }, { opcmethod => 'brin', opcname => 'bpchar_minmax_ops', opcfamily => 'brin/bpchar_minmax_ops', opcintype => 'bpchar', opckeytype => 'bpchar' }, +{ opcmethod => 'brin', opcname => 'bpchar_bloom_ops', + opcfamily => 'brin/bpchar_bloom_ops', opcintype => 'bpchar', + opckeytype => 'bpchar', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'time_minmax_ops', opcfamily => 'brin/time_minmax_ops', opcintype => 'time', opckeytype => 'time' }, +{ opcmethod => 'brin', opcname => 'time_bloom_ops', + opcfamily => 'brin/time_bloom_ops', opcintype => 'time', + opckeytype => 'time', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'date_minmax_ops', opcfamily => 'brin/datetime_minmax_ops', opcintype => 'date', opckeytype => 'date' }, +{ opcmethod => 'brin', opcname => 'date_bloom_ops', + opcfamily => 'brin/datetime_bloom_ops', opcintype => 'date', + opckeytype => 'date', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'timestamp_minmax_ops', opcfamily => 'brin/datetime_minmax_ops', opcintype => 'timestamp', opckeytype => 'timestamp' }, +{ opcmethod => 'brin', opcname => 'timestamp_bloom_ops', + opcfamily => 'brin/datetime_bloom_ops', opcintype => 'timestamp', + opckeytype => 'timestamp', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'timestamptz_minmax_ops', opcfamily => 'brin/datetime_minmax_ops', opcintype => 'timestamptz', opckeytype => 'timestamptz' }, +{ opcmethod => 'brin', opcname => 'timestamptz_bloom_ops', + opcfamily => 'brin/datetime_bloom_ops', opcintype => 'timestamptz', + opckeytype => 'timestamptz', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'interval_minmax_ops', opcfamily => 'brin/interval_minmax_ops', opcintype => 'interval', opckeytype => 'interval' }, +{ opcmethod => 'brin', opcname => 'interval_bloom_ops', + opcfamily => 'brin/interval_bloom_ops', opcintype => 'interval', + opckeytype => 'interval', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'timetz_minmax_ops', opcfamily => 'brin/timetz_minmax_ops', opcintype => 'timetz', opckeytype => 'timetz' }, +{ opcmethod => 'brin', opcname => 'timetz_bloom_ops', + opcfamily => 'brin/timetz_bloom_ops', opcintype => 'timetz', + opckeytype => 'timetz', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'bit_minmax_ops', opcfamily => 'brin/bit_minmax_ops', opcintype => 'bit', opckeytype => 'bit' }, { opcmethod => 'brin', opcname => 'varbit_minmax_ops', @@ -326,18 +386,27 @@ { opcmethod => 'brin', opcname => 'numeric_minmax_ops', opcfamily => 'brin/numeric_minmax_ops', opcintype => 'numeric', opckeytype => 'numeric' }, +{ opcmethod => 'brin', opcname => 'numeric_bloom_ops', + opcfamily => 'brin/numeric_bloom_ops', opcintype => 'numeric', + opckeytype => 'numeric', opcdefault => 'f' }, # no brin opclass for record, anyarray { opcmethod => 'brin', opcname => 'uuid_minmax_ops', opcfamily => 'brin/uuid_minmax_ops', opcintype => 'uuid', opckeytype => 'uuid' }, +{ opcmethod => 'brin', opcname => 'uuid_bloom_ops', + opcfamily => 'brin/uuid_bloom_ops', opcintype => 'uuid', + opckeytype => 'uuid', opcdefault => 'f' }, { opcmethod => 'brin', opcname => 'range_inclusion_ops', opcfamily => 'brin/range_inclusion_ops', opcintype => 'anyrange', opckeytype => 'anyrange' }, { opcmethod => 'brin', opcname => 'pg_lsn_minmax_ops', opcfamily => 'brin/pg_lsn_minmax_ops', opcintype => 'pg_lsn', opckeytype => 'pg_lsn' }, +{ opcmethod => 'brin', opcname => 'pg_lsn_bloom_ops', + opcfamily => 'brin/pg_lsn_bloom_ops', opcintype => 'pg_lsn', + opckeytype => 'pg_lsn', opcdefault => 'f' }, # no brin opclass for enum, tsvector, tsquery, jsonb diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat index cf0fb325b3..7f733aeab1 100644 --- a/src/include/catalog/pg_opfamily.dat +++ b/src/include/catalog/pg_opfamily.dat @@ -180,50 +180,86 @@ opfmethod => 'gin', opfname => 'jsonb_path_ops' }, { oid => '4054', opfmethod => 'brin', opfname => 'integer_minmax_ops' }, +{ oid => '8100', + opfmethod => 'brin', opfname => 'integer_bloom_ops' }, { oid => '4055', opfmethod => 'brin', opfname => 'numeric_minmax_ops' }, { oid => '4056', opfmethod => 'brin', opfname => 'text_minmax_ops' }, +{ oid => '8101', + opfmethod => 'brin', opfname => 'text_bloom_ops' }, +{ oid => '8102', + opfmethod => 'brin', opfname => 'numeric_bloom_ops' }, { oid => '4058', opfmethod => 'brin', opfname => 'timetz_minmax_ops' }, +{ oid => '8103', + opfmethod => 'brin', opfname => 'timetz_bloom_ops' }, { oid => '4059', opfmethod => 'brin', opfname => 'datetime_minmax_ops' }, +{ oid => '8104', + opfmethod => 'brin', opfname => 'datetime_bloom_ops' }, { oid => '4062', opfmethod => 'brin', opfname => 'char_minmax_ops' }, +{ oid => '8105', + opfmethod => 'brin', opfname => 'char_bloom_ops' }, { oid => '4064', opfmethod => 'brin', opfname => 'bytea_minmax_ops' }, +{ oid => '8106', + opfmethod => 'brin', opfname => 'bytea_bloom_ops' }, { oid => '4065', opfmethod => 'brin', opfname => 'name_minmax_ops' }, +{ oid => '8107', + opfmethod => 'brin', opfname => 'name_bloom_ops' }, { oid => '4068', opfmethod => 'brin', opfname => 'oid_minmax_ops' }, +{ oid => '8108', + opfmethod => 'brin', opfname => 'oid_bloom_ops' }, { oid => '4069', opfmethod => 'brin', opfname => 'tid_minmax_ops' }, { oid => '4070', opfmethod => 'brin', opfname => 'float_minmax_ops' }, +{ oid => '8109', + opfmethod => 'brin', opfname => 'float_bloom_ops' }, { oid => '4074', opfmethod => 'brin', opfname => 'macaddr_minmax_ops' }, +{ oid => '8110', + opfmethod => 'brin', opfname => 'macaddr_bloom_ops' }, { oid => '4109', opfmethod => 'brin', opfname => 'macaddr8_minmax_ops' }, +{ oid => '8111', + opfmethod => 'brin', opfname => 'macaddr8_bloom_ops' }, { oid => '4075', opfmethod => 'brin', opfname => 'network_minmax_ops' }, { oid => '4102', opfmethod => 'brin', opfname => 'network_inclusion_ops' }, +{ oid => '8112', + opfmethod => 'brin', opfname => 'network_bloom_ops' }, { oid => '4076', opfmethod => 'brin', opfname => 'bpchar_minmax_ops' }, +{ oid => '8113', + opfmethod => 'brin', opfname => 'bpchar_bloom_ops' }, { oid => '4077', opfmethod => 'brin', opfname => 'time_minmax_ops' }, +{ oid => '8114', + opfmethod => 'brin', opfname => 'time_bloom_ops' }, { oid => '4078', opfmethod => 'brin', opfname => 'interval_minmax_ops' }, +{ oid => '8115', + opfmethod => 'brin', opfname => 'interval_bloom_ops' }, { oid => '4079', opfmethod => 'brin', opfname => 'bit_minmax_ops' }, { oid => '4080', opfmethod => 'brin', opfname => 'varbit_minmax_ops' }, { oid => '4081', opfmethod => 'brin', opfname => 'uuid_minmax_ops' }, +{ oid => '8116', + opfmethod => 'brin', opfname => 'uuid_bloom_ops' }, { oid => '4103', opfmethod => 'brin', opfname => 'range_inclusion_ops' }, { oid => '4082', opfmethod => 'brin', opfname => 'pg_lsn_minmax_ops' }, +{ oid => '8117', + opfmethod => 'brin', opfname => 'pg_lsn_bloom_ops' }, { oid => '4104', opfmethod => 'brin', opfname => 'box_inclusion_ops' }, { oid => '5000', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index e5798da368..132a274bbc 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8096,6 +8096,26 @@ proargtypes => 'internal internal internal', prosrc => 'brin_inclusion_union' }, +# BRIN bloom +{ oid => '8118', descr => 'BRIN bloom support', + proname => 'brin_bloom_opcinfo', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'brin_bloom_opcinfo' }, +{ oid => '8119', descr => 'BRIN bloom support', + proname => 'brin_bloom_add_value', prorettype => 'bool', + proargtypes => 'internal internal internal internal', + prosrc => 'brin_bloom_add_value' }, +{ oid => '8120', descr => 'BRIN bloom support', + proname => 'brin_bloom_consistent', prorettype => 'bool', + proargtypes => 'internal internal internal int4', + prosrc => 'brin_bloom_consistent' }, +{ oid => '8121', descr => 'BRIN bloom support', + proname => 'brin_bloom_union', prorettype => 'bool', + proargtypes => 'internal internal internal', + prosrc => 'brin_bloom_union' }, +{ oid => '8122', descr => 'BRIN bloom support', + proname => 'brin_bloom_options', prorettype => 'void', proisstrict => 'f', + proargtypes => 'internal', prosrc => 'brin_bloom_options' }, + # userlock replacements { oid => '2880', descr => 'obtain exclusive advisory lock', proname => 'pg_advisory_lock', provolatile => 'v', proparallel => 'r', diff --git a/src/test/regress/expected/brin_bloom.out b/src/test/regress/expected/brin_bloom.out new file mode 100644 index 0000000000..d58a4a483c --- /dev/null +++ b/src/test/regress/expected/brin_bloom.out @@ -0,0 +1,456 @@ +CREATE TABLE brintest_bloom (byteacol bytea, + charcol "char", + namecol name, + int8col bigint, + int2col smallint, + int4col integer, + textcol text, + oidcol oid, + float4col real, + float8col double precision, + macaddrcol macaddr, + inetcol inet, + cidrcol cidr, + bpcharcol character, + datecol date, + timecol time without time zone, + timestampcol timestamp without time zone, + timestamptzcol timestamp with time zone, + intervalcol interval, + timetzcol time with time zone, + numericcol numeric, + uuidcol uuid, + lsncol pg_lsn +) WITH (fillfactor=10); +INSERT INTO brintest_bloom SELECT + repeat(stringu1, 8)::bytea, + substr(stringu1, 1, 1)::"char", + stringu1::name, 142857 * tenthous, + thousand, + twothousand, + repeat(stringu1, 8), + unique1::oid, + (four + 1.0)/(hundred+1), + odd::float8 / (tenthous + 1), + format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr, + inet '10.2.3.4/24' + tenthous, + cidr '10.2.3/24' + tenthous, + substr(stringu1, 1, 1)::bpchar, + date '1995-08-15' + tenthous, + time '01:20:30' + thousand * interval '18.5 second', + timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours', + timestamptz '1972-10-10 03:00' + thousand * interval '1 hour', + justify_days(justify_hours(tenthous * interval '12 minutes')), + timetz '01:30:20+02' + hundred * interval '15 seconds', + tenthous::numeric(36,30) * fivethous * even / (hundred + 1), + format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid, + format('%s/%s%s', odd, even, tenthous)::pg_lsn +FROM tenk1 ORDER BY unique2 LIMIT 100; +-- throw in some NULL's and different values +INSERT INTO brintest_bloom (inetcol, cidrcol) SELECT + inet 'fe80::6e40:8ff:fea9:8c46' + tenthous, + cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous +FROM tenk1 ORDER BY thousand, tenthous LIMIT 25; +-- test bloom specific index options +-- ndistinct must be >= -1.0 +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(n_distinct_per_range = -1.1) +); +ERROR: value -1.1 out of bounds for option "n_distinct_per_range" +DETAIL: Valid values are between "-1.000000" and "2147483647.000000". +-- false_positive_rate must be between 0.001 and 1.0 +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(false_positive_rate = 0.0) +); +ERROR: value 0.0 out of bounds for option "false_positive_rate" +DETAIL: Valid values are between "0.001000" and "1.000000". +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(false_positive_rate = 1.01) +); +ERROR: value 1.01 out of bounds for option "false_positive_rate" +DETAIL: Valid values are between "0.001000" and "1.000000". +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops, + charcol char_bloom_ops, + namecol name_bloom_ops, + int8col int8_bloom_ops, + int2col int2_bloom_ops, + int4col int4_bloom_ops, + textcol text_bloom_ops, + oidcol oid_bloom_ops, + float4col float4_bloom_ops, + float8col float8_bloom_ops, + macaddrcol macaddr_bloom_ops, + inetcol inet_bloom_ops, + cidrcol inet_bloom_ops, + bpcharcol bpchar_bloom_ops, + datecol date_bloom_ops, + timecol time_bloom_ops, + timestampcol timestamp_bloom_ops, + timestamptzcol timestamptz_bloom_ops, + intervalcol interval_bloom_ops, + timetzcol timetz_bloom_ops, + numericcol numeric_bloom_ops, + uuidcol uuid_bloom_ops, + lsncol pg_lsn_bloom_ops +) with (pages_per_range = 1); +CREATE TABLE brinopers_bloom (colname name, typ text, + op text[], value text[], matches int[], + check (cardinality(op) = cardinality(value)), + check (cardinality(op) = cardinality(matches))); +INSERT INTO brinopers_bloom VALUES + ('byteacol', 'bytea', + '{=}', + '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}', + '{1}'), + ('charcol', '"char"', + '{=}', + '{M}', + '{6}'), + ('namecol', 'name', + '{=}', + '{MAAAAA}', + '{2}'), + ('int2col', 'int2', + '{=}', + '{800}', + '{1}'), + ('int2col', 'int4', + '{=}', + '{800}', + '{1}'), + ('int2col', 'int8', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int2', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int4', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int8', + '{=}', + '{800}', + '{1}'), + ('int8col', 'int8', + '{=}', + '{1257141600}', + '{1}'), + ('textcol', 'text', + '{=}', + '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}', + '{1}'), + ('oidcol', 'oid', + '{=}', + '{8800}', + '{1}'), + ('float4col', 'float4', + '{=}', + '{1}', + '{4}'), + ('float4col', 'float8', + '{=}', + '{1}', + '{4}'), + ('float8col', 'float4', + '{=}', + '{0}', + '{1}'), + ('float8col', 'float8', + '{=}', + '{0}', + '{1}'), + ('macaddrcol', 'macaddr', + '{=}', + '{2c:00:2d:00:16:00}', + '{2}'), + ('inetcol', 'inet', + '{=}', + '{10.2.14.231/24}', + '{1}'), + ('inetcol', 'cidr', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('cidrcol', 'inet', + '{=}', + '{10.2.14/24}', + '{2}'), + ('cidrcol', 'inet', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('cidrcol', 'cidr', + '{=}', + '{10.2.14/24}', + '{2}'), + ('cidrcol', 'cidr', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('bpcharcol', 'bpchar', + '{=}', + '{W}', + '{6}'), + ('datecol', 'date', + '{=}', + '{2009-12-01}', + '{1}'), + ('timecol', 'time', + '{=}', + '{02:28:57}', + '{1}'), + ('timestampcol', 'timestamp', + '{=}', + '{1964-03-24 19:26:45}', + '{1}'), + ('timestampcol', 'timestamptz', + '{=}', + '{1964-03-24 19:26:45}', + '{1}'), + ('timestamptzcol', 'timestamptz', + '{=}', + '{1972-10-19 09:00:00-07}', + '{1}'), + ('intervalcol', 'interval', + '{=}', + '{1 mons 13 days 12:24}', + '{1}'), + ('timetzcol', 'timetz', + '{=}', + '{01:35:50+02}', + '{2}'), + ('numericcol', 'numeric', + '{=}', + '{2268164.347826086956521739130434782609}', + '{1}'), + ('uuidcol', 'uuid', + '{=}', + '{52225222-5222-5222-5222-522252225222}', + '{1}'), + ('lsncol', 'pg_lsn', + '{=, IS, IS NOT}', + '{44/455222, NULL, NULL}', + '{1, 25, 100}'); +DO $x$ +DECLARE + r record; + r2 record; + cond text; + idx_ctids tid[]; + ss_ctids tid[]; + count int; + plan_ok bool; + plan_line text; +BEGIN + FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers_bloom, unnest(op) WITH ORDINALITY AS oper LOOP + + -- prepare the condition + IF r.value IS NULL THEN + cond := format('%I %s %L', r.colname, r.oper, r.value); + ELSE + cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ); + END IF; + + -- run the query using the brin index + SET enable_seqscan = 0; + SET enable_bitmapscan = 1; + + plan_ok := false; + FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP + IF plan_line LIKE '%Bitmap Heap Scan on brintest_bloom%' THEN + plan_ok := true; + END IF; + END LOOP; + IF NOT plan_ok THEN + RAISE WARNING 'did not get bitmap indexscan plan for %', r; + END IF; + + EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) + INTO idx_ctids; + + -- run the query using a seqscan + SET enable_seqscan = 1; + SET enable_bitmapscan = 0; + + plan_ok := false; + FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP + IF plan_line LIKE '%Seq Scan on brintest_bloom%' THEN + plan_ok := true; + END IF; + END LOOP; + IF NOT plan_ok THEN + RAISE WARNING 'did not get seqscan plan for %', r; + END IF; + + EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) + INTO ss_ctids; + + -- make sure both return the same results + count := array_length(idx_ctids, 1); + + IF NOT (count = array_length(ss_ctids, 1) AND + idx_ctids @> ss_ctids AND + idx_ctids <@ ss_ctids) THEN + -- report the results of each scan to make the differences obvious + RAISE WARNING 'something not right in %: count %', r, count; + SET enable_seqscan = 1; + SET enable_bitmapscan = 0; + FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP + RAISE NOTICE 'seqscan: %', r2; + END LOOP; + + SET enable_seqscan = 0; + SET enable_bitmapscan = 1; + FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP + RAISE NOTICE 'bitmapscan: %', r2; + END LOOP; + END IF; + + -- make sure we found expected number of matches + IF count != r.matches THEN RAISE WARNING 'unexpected number of results % for %', count, r; END IF; + END LOOP; +END; +$x$; +RESET enable_seqscan; +RESET enable_bitmapscan; +INSERT INTO brintest_bloom SELECT + repeat(stringu1, 42)::bytea, + substr(stringu1, 1, 1)::"char", + stringu1::name, 142857 * tenthous, + thousand, + twothousand, + repeat(stringu1, 42), + unique1::oid, + (four + 1.0)/(hundred+1), + odd::float8 / (tenthous + 1), + format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr, + inet '10.2.3.4' + tenthous, + cidr '10.2.3/24' + tenthous, + substr(stringu1, 1, 1)::bpchar, + date '1995-08-15' + tenthous, + time '01:20:30' + thousand * interval '18.5 second', + timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours', + timestamptz '1972-10-10 03:00' + thousand * interval '1 hour', + justify_days(justify_hours(tenthous * interval '12 minutes')), + timetz '01:30:20' + hundred * interval '15 seconds', + tenthous::numeric(36,30) * fivethous * even / (hundred + 1), + format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid, + format('%s/%s%s', odd, even, tenthous)::pg_lsn +FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; +SELECT brin_desummarize_range('brinidx_bloom', 0); + brin_desummarize_range +------------------------ + +(1 row) + +VACUUM brintest_bloom; -- force a summarization cycle in brinidx +UPDATE brintest_bloom SET int8col = int8col * int4col; +UPDATE brintest_bloom SET textcol = '' WHERE textcol IS NOT NULL; +-- Tests for brin_summarize_new_values +SELECT brin_summarize_new_values('brintest_bloom'); -- error, not an index +ERROR: "brintest_bloom" is not an index +SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index +ERROR: "tenk1_unique1" is not a BRIN index +SELECT brin_summarize_new_values('brinidx_bloom'); -- ok, no change expected + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Tests for brin_desummarize_range +SELECT brin_desummarize_range('brinidx_bloom', -1); -- error, invalid range +ERROR: block number out of range: -1 +SELECT brin_desummarize_range('brinidx_bloom', 0); + brin_desummarize_range +------------------------ + +(1 row) + +SELECT brin_desummarize_range('brinidx_bloom', 0); + brin_desummarize_range +------------------------ + +(1 row) + +SELECT brin_desummarize_range('brinidx_bloom', 100000000); + brin_desummarize_range +------------------------ + +(1 row) + +-- Test brin_summarize_range +CREATE TABLE brin_summarize_bloom ( + value int +) WITH (fillfactor=10, autovacuum_enabled=false); +CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2); +-- Fill a few pages +DO $$ +DECLARE curtid tid; +BEGIN + LOOP + INSERT INTO brin_summarize_bloom VALUES (1) RETURNING ctid INTO curtid; + EXIT WHEN curtid > tid '(2, 0)'; + END LOOP; +END; +$$; +-- summarize one range +SELECT brin_summarize_range('brin_summarize_bloom_idx', 0); + brin_summarize_range +---------------------- + 0 +(1 row) + +-- nothing: already summarized +SELECT brin_summarize_range('brin_summarize_bloom_idx', 1); + brin_summarize_range +---------------------- + 0 +(1 row) + +-- summarize one range +SELECT brin_summarize_range('brin_summarize_bloom_idx', 2); + brin_summarize_range +---------------------- + 1 +(1 row) + +-- nothing: page doesn't exist in table +SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967295); + brin_summarize_range +---------------------- + 0 +(1 row) + +-- invalid block number values +SELECT brin_summarize_range('brin_summarize_bloom_idx', -1); +ERROR: block number out of range: -1 +SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967296); +ERROR: block number out of range: 4294967296 +-- test brin cost estimates behave sanely based on correlation of values +CREATE TABLE brin_test_bloom (a INT, b INT); +INSERT INTO brin_test_bloom SELECT x/100,x%100 FROM generate_series(1,10000) x(x); +CREATE INDEX brin_test_bloom_a_idx ON brin_test_bloom USING brin (a) WITH (pages_per_range = 2); +CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2); +VACUUM ANALYZE brin_test_bloom; +-- Ensure brin index is used when columns are perfectly correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE a = 1; + QUERY PLAN +-------------------------------------------------- + Bitmap Heap Scan on brin_test_bloom + Recheck Cond: (a = 1) + -> Bitmap Index Scan on brin_test_bloom_a_idx + Index Cond: (a = 1) +(4 rows) + +-- Ensure brin index is not used when values are not correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE b = 1; + QUERY PLAN +----------------------------- + Seq Scan on brin_test_bloom + Filter: (b = 1) +(2 rows) + diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 27056d70d3..729d5c04c1 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -2009,6 +2009,7 @@ ORDER BY 1, 2, 3; 2742 | 16 | @@ 3580 | 1 | < 3580 | 1 | << + 3580 | 1 | = 3580 | 2 | &< 3580 | 2 | <= 3580 | 3 | && @@ -2072,7 +2073,7 @@ ORDER BY 1, 2, 3; 4000 | 26 | >> 4000 | 27 | >>= 4000 | 28 | ^@ -(125 rows) +(126 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 0b990fd814..5d11d7a946 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -4929,8 +4929,9 @@ List of access methods List of operator classes AM | Input type | Storage type | Operator class | Default? ------+------------+--------------+----------------+---------- + brin | oid | | oid_bloom_ops | no brin | oid | | oid_minmax_ops | yes -(1 row) +(2 rows) \dAf spgist List of operator families diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 95f1925072..620b55152d 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -75,6 +75,11 @@ test: select_into select_distinct select_distinct_on select_implicit select_havi # ---------- test: brin gin gist spgist privileges init_privs security_label collate matview lock replica_identity rowsecurity object_address tablesample groupingsets drop_operator password identity generated join_hash +# ---------- +# Additional BRIN tests +# ---------- +test: brin_bloom + # ---------- # Another group of parallel tests # ---------- diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 8ba4136220..1d59f2d90f 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -107,6 +107,7 @@ test: delete test: namespace test: prepared_xacts test: brin +test: brin_bloom test: gin test: gist test: spgist diff --git a/src/test/regress/sql/brin_bloom.sql b/src/test/regress/sql/brin_bloom.sql new file mode 100644 index 0000000000..abd5a33deb --- /dev/null +++ b/src/test/regress/sql/brin_bloom.sql @@ -0,0 +1,404 @@ +CREATE TABLE brintest_bloom (byteacol bytea, + charcol "char", + namecol name, + int8col bigint, + int2col smallint, + int4col integer, + textcol text, + oidcol oid, + float4col real, + float8col double precision, + macaddrcol macaddr, + inetcol inet, + cidrcol cidr, + bpcharcol character, + datecol date, + timecol time without time zone, + timestampcol timestamp without time zone, + timestamptzcol timestamp with time zone, + intervalcol interval, + timetzcol time with time zone, + numericcol numeric, + uuidcol uuid, + lsncol pg_lsn +) WITH (fillfactor=10); + +INSERT INTO brintest_bloom SELECT + repeat(stringu1, 8)::bytea, + substr(stringu1, 1, 1)::"char", + stringu1::name, 142857 * tenthous, + thousand, + twothousand, + repeat(stringu1, 8), + unique1::oid, + (four + 1.0)/(hundred+1), + odd::float8 / (tenthous + 1), + format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr, + inet '10.2.3.4/24' + tenthous, + cidr '10.2.3/24' + tenthous, + substr(stringu1, 1, 1)::bpchar, + date '1995-08-15' + tenthous, + time '01:20:30' + thousand * interval '18.5 second', + timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours', + timestamptz '1972-10-10 03:00' + thousand * interval '1 hour', + justify_days(justify_hours(tenthous * interval '12 minutes')), + timetz '01:30:20+02' + hundred * interval '15 seconds', + tenthous::numeric(36,30) * fivethous * even / (hundred + 1), + format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid, + format('%s/%s%s', odd, even, tenthous)::pg_lsn +FROM tenk1 ORDER BY unique2 LIMIT 100; + +-- throw in some NULL's and different values +INSERT INTO brintest_bloom (inetcol, cidrcol) SELECT + inet 'fe80::6e40:8ff:fea9:8c46' + tenthous, + cidr 'fe80::6e40:8ff:fea9:8c46' + tenthous +FROM tenk1 ORDER BY thousand, tenthous LIMIT 25; + +-- test bloom specific index options +-- ndistinct must be >= -1.0 +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(n_distinct_per_range = -1.1) +); +-- false_positive_rate must be between 0.001 and 1.0 +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(false_positive_rate = 0.0) +); +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops(false_positive_rate = 1.01) +); + +CREATE INDEX brinidx_bloom ON brintest_bloom USING brin ( + byteacol bytea_bloom_ops, + charcol char_bloom_ops, + namecol name_bloom_ops, + int8col int8_bloom_ops, + int2col int2_bloom_ops, + int4col int4_bloom_ops, + textcol text_bloom_ops, + oidcol oid_bloom_ops, + float4col float4_bloom_ops, + float8col float8_bloom_ops, + macaddrcol macaddr_bloom_ops, + inetcol inet_bloom_ops, + cidrcol inet_bloom_ops, + bpcharcol bpchar_bloom_ops, + datecol date_bloom_ops, + timecol time_bloom_ops, + timestampcol timestamp_bloom_ops, + timestamptzcol timestamptz_bloom_ops, + intervalcol interval_bloom_ops, + timetzcol timetz_bloom_ops, + numericcol numeric_bloom_ops, + uuidcol uuid_bloom_ops, + lsncol pg_lsn_bloom_ops +) with (pages_per_range = 1); + +CREATE TABLE brinopers_bloom (colname name, typ text, + op text[], value text[], matches int[], + check (cardinality(op) = cardinality(value)), + check (cardinality(op) = cardinality(matches))); + +INSERT INTO brinopers_bloom VALUES + ('byteacol', 'bytea', + '{=}', + '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}', + '{1}'), + ('charcol', '"char"', + '{=}', + '{M}', + '{6}'), + ('namecol', 'name', + '{=}', + '{MAAAAA}', + '{2}'), + ('int2col', 'int2', + '{=}', + '{800}', + '{1}'), + ('int2col', 'int4', + '{=}', + '{800}', + '{1}'), + ('int2col', 'int8', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int2', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int4', + '{=}', + '{800}', + '{1}'), + ('int4col', 'int8', + '{=}', + '{800}', + '{1}'), + ('int8col', 'int8', + '{=}', + '{1257141600}', + '{1}'), + ('textcol', 'text', + '{=}', + '{BNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAABNAAAA}', + '{1}'), + ('oidcol', 'oid', + '{=}', + '{8800}', + '{1}'), + ('float4col', 'float4', + '{=}', + '{1}', + '{4}'), + ('float4col', 'float8', + '{=}', + '{1}', + '{4}'), + ('float8col', 'float4', + '{=}', + '{0}', + '{1}'), + ('float8col', 'float8', + '{=}', + '{0}', + '{1}'), + ('macaddrcol', 'macaddr', + '{=}', + '{2c:00:2d:00:16:00}', + '{2}'), + ('inetcol', 'inet', + '{=}', + '{10.2.14.231/24}', + '{1}'), + ('inetcol', 'cidr', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('cidrcol', 'inet', + '{=}', + '{10.2.14/24}', + '{2}'), + ('cidrcol', 'inet', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('cidrcol', 'cidr', + '{=}', + '{10.2.14/24}', + '{2}'), + ('cidrcol', 'cidr', + '{=}', + '{fe80::6e40:8ff:fea9:8c46}', + '{1}'), + ('bpcharcol', 'bpchar', + '{=}', + '{W}', + '{6}'), + ('datecol', 'date', + '{=}', + '{2009-12-01}', + '{1}'), + ('timecol', 'time', + '{=}', + '{02:28:57}', + '{1}'), + ('timestampcol', 'timestamp', + '{=}', + '{1964-03-24 19:26:45}', + '{1}'), + ('timestampcol', 'timestamptz', + '{=}', + '{1964-03-24 19:26:45}', + '{1}'), + ('timestamptzcol', 'timestamptz', + '{=}', + '{1972-10-19 09:00:00-07}', + '{1}'), + ('intervalcol', 'interval', + '{=}', + '{1 mons 13 days 12:24}', + '{1}'), + ('timetzcol', 'timetz', + '{=}', + '{01:35:50+02}', + '{2}'), + ('numericcol', 'numeric', + '{=}', + '{2268164.347826086956521739130434782609}', + '{1}'), + ('uuidcol', 'uuid', + '{=}', + '{52225222-5222-5222-5222-522252225222}', + '{1}'), + ('lsncol', 'pg_lsn', + '{=, IS, IS NOT}', + '{44/455222, NULL, NULL}', + '{1, 25, 100}'); + +DO $x$ +DECLARE + r record; + r2 record; + cond text; + idx_ctids tid[]; + ss_ctids tid[]; + count int; + plan_ok bool; + plan_line text; +BEGIN + FOR r IN SELECT colname, oper, typ, value[ordinality], matches[ordinality] FROM brinopers_bloom, unnest(op) WITH ORDINALITY AS oper LOOP + + -- prepare the condition + IF r.value IS NULL THEN + cond := format('%I %s %L', r.colname, r.oper, r.value); + ELSE + cond := format('%I %s %L::%s', r.colname, r.oper, r.value, r.typ); + END IF; + + -- run the query using the brin index + SET enable_seqscan = 0; + SET enable_bitmapscan = 1; + + plan_ok := false; + FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP + IF plan_line LIKE '%Bitmap Heap Scan on brintest_bloom%' THEN + plan_ok := true; + END IF; + END LOOP; + IF NOT plan_ok THEN + RAISE WARNING 'did not get bitmap indexscan plan for %', r; + END IF; + + EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) + INTO idx_ctids; + + -- run the query using a seqscan + SET enable_seqscan = 1; + SET enable_bitmapscan = 0; + + plan_ok := false; + FOR plan_line IN EXECUTE format($y$EXPLAIN SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) LOOP + IF plan_line LIKE '%Seq Scan on brintest_bloom%' THEN + plan_ok := true; + END IF; + END LOOP; + IF NOT plan_ok THEN + RAISE WARNING 'did not get seqscan plan for %', r; + END IF; + + EXECUTE format($y$SELECT array_agg(ctid) FROM brintest_bloom WHERE %s $y$, cond) + INTO ss_ctids; + + -- make sure both return the same results + count := array_length(idx_ctids, 1); + + IF NOT (count = array_length(ss_ctids, 1) AND + idx_ctids @> ss_ctids AND + idx_ctids <@ ss_ctids) THEN + -- report the results of each scan to make the differences obvious + RAISE WARNING 'something not right in %: count %', r, count; + SET enable_seqscan = 1; + SET enable_bitmapscan = 0; + FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP + RAISE NOTICE 'seqscan: %', r2; + END LOOP; + + SET enable_seqscan = 0; + SET enable_bitmapscan = 1; + FOR r2 IN EXECUTE 'SELECT ' || r.colname || ' FROM brintest_bloom WHERE ' || cond LOOP + RAISE NOTICE 'bitmapscan: %', r2; + END LOOP; + END IF; + + -- make sure we found expected number of matches + IF count != r.matches THEN RAISE WARNING 'unexpected number of results % for %', count, r; END IF; + END LOOP; +END; +$x$; + +RESET enable_seqscan; +RESET enable_bitmapscan; + +INSERT INTO brintest_bloom SELECT + repeat(stringu1, 42)::bytea, + substr(stringu1, 1, 1)::"char", + stringu1::name, 142857 * tenthous, + thousand, + twothousand, + repeat(stringu1, 42), + unique1::oid, + (four + 1.0)/(hundred+1), + odd::float8 / (tenthous + 1), + format('%s:00:%s:00:%s:00', to_hex(odd), to_hex(even), to_hex(hundred))::macaddr, + inet '10.2.3.4' + tenthous, + cidr '10.2.3/24' + tenthous, + substr(stringu1, 1, 1)::bpchar, + date '1995-08-15' + tenthous, + time '01:20:30' + thousand * interval '18.5 second', + timestamp '1942-07-23 03:05:09' + tenthous * interval '36.38 hours', + timestamptz '1972-10-10 03:00' + thousand * interval '1 hour', + justify_days(justify_hours(tenthous * interval '12 minutes')), + timetz '01:30:20' + hundred * interval '15 seconds', + tenthous::numeric(36,30) * fivethous * even / (hundred + 1), + format('%s%s-%s-%s-%s-%s%s%s', to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'), to_char(tenthous, 'FM0000'))::uuid, + format('%s/%s%s', odd, even, tenthous)::pg_lsn +FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; + +SELECT brin_desummarize_range('brinidx_bloom', 0); +VACUUM brintest_bloom; -- force a summarization cycle in brinidx + +UPDATE brintest_bloom SET int8col = int8col * int4col; +UPDATE brintest_bloom SET textcol = '' WHERE textcol IS NOT NULL; + +-- Tests for brin_summarize_new_values +SELECT brin_summarize_new_values('brintest_bloom'); -- error, not an index +SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index +SELECT brin_summarize_new_values('brinidx_bloom'); -- ok, no change expected + +-- Tests for brin_desummarize_range +SELECT brin_desummarize_range('brinidx_bloom', -1); -- error, invalid range +SELECT brin_desummarize_range('brinidx_bloom', 0); +SELECT brin_desummarize_range('brinidx_bloom', 0); +SELECT brin_desummarize_range('brinidx_bloom', 100000000); + +-- Test brin_summarize_range +CREATE TABLE brin_summarize_bloom ( + value int +) WITH (fillfactor=10, autovacuum_enabled=false); +CREATE INDEX brin_summarize_bloom_idx ON brin_summarize_bloom USING brin (value) WITH (pages_per_range=2); +-- Fill a few pages +DO $$ +DECLARE curtid tid; +BEGIN + LOOP + INSERT INTO brin_summarize_bloom VALUES (1) RETURNING ctid INTO curtid; + EXIT WHEN curtid > tid '(2, 0)'; + END LOOP; +END; +$$; + +-- summarize one range +SELECT brin_summarize_range('brin_summarize_bloom_idx', 0); +-- nothing: already summarized +SELECT brin_summarize_range('brin_summarize_bloom_idx', 1); +-- summarize one range +SELECT brin_summarize_range('brin_summarize_bloom_idx', 2); +-- nothing: page doesn't exist in table +SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967295); +-- invalid block number values +SELECT brin_summarize_range('brin_summarize_bloom_idx', -1); +SELECT brin_summarize_range('brin_summarize_bloom_idx', 4294967296); + + +-- test brin cost estimates behave sanely based on correlation of values +CREATE TABLE brin_test_bloom (a INT, b INT); +INSERT INTO brin_test_bloom SELECT x/100,x%100 FROM generate_series(1,10000) x(x); +CREATE INDEX brin_test_bloom_a_idx ON brin_test_bloom USING brin (a) WITH (pages_per_range = 2); +CREATE INDEX brin_test_bloom_b_idx ON brin_test_bloom USING brin (b) WITH (pages_per_range = 2); +VACUUM ANALYZE brin_test_bloom; + +-- Ensure brin index is used when columns are perfectly correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE a = 1; +-- Ensure brin index is not used when values are not correlated +EXPLAIN (COSTS OFF) SELECT * FROM brin_test_bloom WHERE b = 1; -- 2.21.3