From df52a663305f1f900801925e2914a4c355177427 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 6 Aug 2020 18:20:28 +0200 Subject: [PATCH 07/10] add special pg_brin_minmax_multi_summary data type --- src/backend/access/brin/brin_minmax_multi.c | 260 +++++++++++++++++--- src/include/catalog/pg_proc.dat | 15 ++ src/include/catalog/pg_type.dat | 7 + src/test/regress/expected/type_sanity.out | 7 +- 4 files changed, 251 insertions(+), 38 deletions(-) diff --git a/src/backend/access/brin/brin_minmax_multi.c b/src/backend/access/brin/brin_minmax_multi.c index a8ff28a98d..1a1ff47223 100644 --- a/src/backend/access/brin/brin_minmax_multi.c +++ b/src/backend/access/brin/brin_minmax_multi.c @@ -59,6 +59,7 @@ #include "access/htup_details.h" #include "catalog/pg_type.h" #include "catalog/pg_amop.h" +#include "utils/array.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datum.h" @@ -165,6 +166,9 @@ typedef struct SerializedRanges /* varlena header (do not touch directly!) */ int32 vl_len_; + /* type of values stored in the data array */ + Oid typid; + /* (2*nranges + nvalues) <= maxvalues */ int nranges; /* number of ranges in the array (stored) */ int nvalues; /* number of values in the data array (all) */ @@ -174,11 +178,9 @@ typedef struct SerializedRanges char data[FLEXIBLE_ARRAY_MEMBER]; } SerializedRanges; -static SerializedRanges *range_serialize(Ranges *range, - AttrNumber attno, Form_pg_attribute attr); +static SerializedRanges *range_serialize(Ranges *range, Oid typid); -static Ranges *range_deserialize(SerializedRanges *range, - AttrNumber attno, Form_pg_attribute attr); +static Ranges *range_deserialize(SerializedRanges *range); /* Cache for support and strategy procesures. */ @@ -223,11 +225,13 @@ minmax_multi_init(int maxvalues) * in the in-memory value array. */ static SerializedRanges * -range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) +range_serialize(Ranges *range, Oid typid) { Size len; int nvalues; SerializedRanges *serialized; + int typlen; + bool typbyval; int i; char *ptr; @@ -242,6 +246,9 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) Assert(2*range->nranges + range->nvalues <= range->maxvalues); + typbyval = get_typbyval(typid); + typlen = get_typlen(typid); + /* header is always needed */ len = offsetof(SerializedRanges,data); @@ -251,7 +258,7 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) * (attlen * nvalues) and we're done. For variable-length by-reference * types we need to actually walk all the values and sum the lengths. */ - if (attr->attlen == -1) /* varlena */ + if (typlen == -1) /* varlena */ { int i; for (i = 0; i < nvalues; i++) @@ -259,7 +266,7 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) len += VARSIZE_ANY(range->values[i]); } } - else if (attr->attlen == -2) /* cstring */ + else if (typlen == -2) /* cstring */ { int i; for (i = 0; i < nvalues; i++) @@ -270,8 +277,8 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) } else /* fixed-length types (even by-reference) */ { - Assert(attr->attlen > 0); - len += nvalues * attr->attlen; + Assert(typlen > 0); + len += nvalues * typlen; } /* @@ -281,6 +288,7 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) serialized = (SerializedRanges *) palloc0(len); SET_VARSIZE(serialized, len); + serialized->typid = typid; serialized->nranges = range->nranges; serialized->nvalues = range->nvalues; serialized->maxvalues = range->maxvalues; @@ -293,23 +301,23 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) for (i = 0; i < nvalues; i++) { - if (attr->attbyval) /* simple by-value data types */ + if (typbyval) /* simple by-value data types */ { - memcpy(ptr, &range->values[i], attr->attlen); - ptr += attr->attlen; + memcpy(ptr, &range->values[i], typlen); + ptr += typlen; } - else if (attr->attlen > 0) /* fixed-length by-ref types */ + else if (typlen > 0) /* fixed-length by-ref types */ { - memcpy(ptr, DatumGetPointer(range->values[i]), attr->attlen); - ptr += attr->attlen; + memcpy(ptr, DatumGetPointer(range->values[i]), typlen); + ptr += typlen; } - else if (attr->attlen == -1) /* varlena */ + else if (typlen == -1) /* varlena */ { int tmp = VARSIZE_ANY(DatumGetPointer(range->values[i])); memcpy(ptr, DatumGetPointer(range->values[i]), tmp); ptr += tmp; } - else if (attr->attlen == -2) /* cstring */ + else if (typlen == -2) /* cstring */ { int tmp = strlen(DatumGetPointer(range->values[i])) + 1; memcpy(ptr, DatumGetPointer(range->values[i]), tmp); @@ -334,12 +342,13 @@ range_serialize(Ranges *range, AttrNumber attno, Form_pg_attribute attr) * in the in-memory value array. */ static Ranges * -range_deserialize(SerializedRanges *serialized, - AttrNumber attno, Form_pg_attribute attr) +range_deserialize(SerializedRanges *serialized) { int i, nvalues; char *ptr; + bool typbyval; + int typlen; Ranges *range; @@ -358,6 +367,9 @@ range_deserialize(SerializedRanges *serialized, range->nvalues = serialized->nvalues; range->maxvalues = serialized->maxvalues; + typbyval = get_typbyval(serialized->typid); + typlen = get_typlen(serialized->typid); + /* * And now deconstruct the values into Datum array. We don't need * to copy the values and will instead just point the values to the @@ -367,23 +379,23 @@ range_deserialize(SerializedRanges *serialized, for (i = 0; i < nvalues; i++) { - if (attr->attbyval) /* simple by-value data types */ + if (typbyval) /* simple by-value data types */ { - memcpy(&range->values[i], ptr, attr->attlen); - ptr += attr->attlen; + memcpy(&range->values[i], ptr, typlen); + ptr += typlen; } - else if (attr->attlen > 0) /* fixed-length by-ref types */ + else if (typlen > 0) /* fixed-length by-ref types */ { /* no copy, just set the value to the pointer */ range->values[i] = PointerGetDatum(ptr); - ptr += attr->attlen; + ptr += typlen; } - else if (attr->attlen == -1) /* varlena */ + else if (typlen == -1) /* varlena */ { range->values[i] = PointerGetDatum(ptr); ptr += VARSIZE_ANY(DatumGetPointer(range->values[i])); } - else if (attr->attlen == -2) /* cstring */ + else if (typlen == -2) /* cstring */ { range->values[i] = PointerGetDatum(ptr); ptr += strlen(DatumGetPointer(range->values[i])) + 1; @@ -1287,7 +1299,7 @@ brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS) result->oi_regular_nulls = true; result->oi_opaque = (MinmaxMultiOpaque *) MAXALIGN((char *) result + SizeofBrinOpcInfo(1)); - result->oi_typcache[0] = lookup_type_cache(BYTEAOID, 0); + result->oi_typcache[0] = lookup_type_cache(BRINMINMAXMULTISUMMARYOID, 0); PG_RETURN_POINTER(result); } @@ -1732,7 +1744,7 @@ brin_minmax_multi_add_value(PG_FUNCTION_ARGS) else { serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]); - ranges = range_deserialize(serialized, attno, attr); + ranges = range_deserialize(serialized); } /* @@ -1743,7 +1755,7 @@ brin_minmax_multi_add_value(PG_FUNCTION_ARGS) if (modified) { - SerializedRanges *s = range_serialize(ranges, attno, attr); + SerializedRanges *s = range_serialize(ranges, attr->atttypid); column->bv_values[0] = PointerGetDatum(s); /* @@ -1782,13 +1794,11 @@ brin_minmax_multi_consistent(PG_FUNCTION_ARGS) int keyno; int rangeno; int i; - Form_pg_attribute attr; attno = column->bv_attno; - attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1); serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]); - ranges = range_deserialize(serialized, attno, attr); + ranges = range_deserialize(serialized); /* inspect the ranges, and for each one evaluate the scan keys */ for (rangeno = 0; rangeno < ranges->nranges; rangeno++) @@ -1975,8 +1985,8 @@ brin_minmax_multi_union(PG_FUNCTION_ARGS) serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]); serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]); - ranges_a = range_deserialize(serialized_a, attno, attr); - ranges_b = range_deserialize(serialized_b, attno, attr); + ranges_a = range_deserialize(serialized_a); + ranges_b = range_deserialize(serialized_b); /* make sure neither of the ranges is NULL */ Assert(ranges_a && ranges_b); @@ -2046,7 +2056,7 @@ brin_minmax_multi_union(PG_FUNCTION_ARGS) /* cleanup and update the serialized value */ pfree(serialized_a); - col_a->bv_values[0] = PointerGetDatum(range_serialize(ranges_a, attno, attr)); + col_a->bv_values[0] = PointerGetDatum(range_serialize(ranges_a, attr->atttypid)); PG_RETURN_VOID(); } @@ -2170,3 +2180,183 @@ brin_minmax_multi_options(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * brin_minmax_multi_summary_in + * - input routine for type brin_minmax_multi_summary. + * + * brin_minmax_multi_summary is only used internally to represent summaries + * in BRIN minmax-multi indexes, so it has no operations of its own, and we + * disallow input too. + */ +Datum +brin_minmax_multi_summary_in(PG_FUNCTION_ARGS) +{ + /* + * brin_minmax_multi_summary stores the data in binary form and parsing + * text input is not needed, so disallow this. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + + +/* + * brin_minmax_multi_summary_out + * - output routine for type brin_minmax_multi_summary. + * + * BRIN minmax-multi summaries are serialized into a bytea value, but we + * want to output something nicer humans can understand. + */ +Datum +brin_minmax_multi_summary_out(PG_FUNCTION_ARGS) +{ + int i; + int idx; + SerializedRanges *ranges; + Ranges *ranges_deserialized; + StringInfoData str; + bool isvarlena; + Oid outfunc; + FmgrInfo fmgrinfo; + ArrayBuildState *astate_values = NULL; + + initStringInfo(&str); + appendStringInfoChar(&str, '{'); + + /* + * XXX not sure the detoasting is necessary (probably not, this + * can only be in an index). + */ + ranges = (SerializedRanges *) PG_DETOAST_DATUM(PG_GETARG_BYTEA_PP(0)); + + /* lookup output func for the type */ + getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena); + fmgr_info(outfunc, &fmgrinfo); + + /* deserialize the range info easy-to-process pieces */ + ranges_deserialized = range_deserialize(ranges); + + appendStringInfo(&str, "nranges: %u nvalues: %u maxvalues: %u", + ranges_deserialized->nranges, + ranges_deserialized->nvalues, + ranges_deserialized->maxvalues); + + /* serialize ranges */ + idx = 0; + for (i = 0; i < ranges_deserialized->nranges; i++) + { + Datum a, b; + text *c; + StringInfoData str; + + initStringInfo(&str); + + a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); + b = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); + + appendStringInfo(&str, "%s ... %s", + DatumGetPointer(a), + DatumGetPointer(b)); + + c = cstring_to_text(str.data); + + astate_values = accumArrayResult(astate_values, + PointerGetDatum(c), + false, + TEXTOID, + CurrentMemoryContext); + } + + if (ranges_deserialized->nranges > 0) + { + Oid typoutput; + bool typIsVarlena; + Datum val; + char *extval; + + getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena); + + val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext)); + + extval = OidOutputFunctionCall(typoutput, val); + + appendStringInfo(&str, " ranges: %s", extval); + } + + /* serialize individual values */ + astate_values = NULL; + + for (i = 0; i < ranges_deserialized->nvalues; i++) + { + Datum a; + text *b; + StringInfoData str; + + initStringInfo(&str); + + a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]); + + appendStringInfo(&str, "%s", DatumGetPointer(a)); + + b = cstring_to_text(str.data); + + astate_values = accumArrayResult(astate_values, + PointerGetDatum(b), + false, + TEXTOID, + CurrentMemoryContext); + } + + if (ranges_deserialized->nvalues > 0) + { + Oid typoutput; + bool typIsVarlena; + Datum val; + char *extval; + + getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena); + + val = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext)); + + extval = OidOutputFunctionCall(typoutput, val); + + appendStringInfo(&str, " values: %s", extval); + } + + + appendStringInfoChar(&str, '}'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * brin_minmax_multi_summary_recv + * - binary input routine for type brin_minmax_multi_summary. + */ +Datum +brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * brin_minmax_multi_summary_send + * - binary output routine for type brin_minmax_multi_summary. + * + * BRIN minmax-multi summaries are serialized in a bytea value (although + * the type is named differently), so let's just send that. + */ +Datum +brin_minmax_multi_summary_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 1936e06537..77735c3abe 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11072,3 +11072,18 @@ { oid => '9038', descr => 'I/O', proname => 'brin_bloom_summary_send', provolatile => 's', prorettype => 'bytea', proargtypes => 'pg_brin_bloom_summary', prosrc => 'brin_bloom_summary_send' }, + + +{ oid => '9040', descr => 'I/O', + proname => 'brin_minmax_multi_summary_in', prorettype => 'pg_brin_minmax_multi_summary', + proargtypes => 'cstring', prosrc => 'brin_minmax_multi_summary_in' }, +{ oid => '9041', descr => 'I/O', + proname => 'brin_minmax_multi_summary_out', prorettype => 'cstring', + proargtypes => 'pg_brin_minmax_multi_summary', prosrc => 'brin_minmax_multi_summary_out' }, +{ oid => '9042', descr => 'I/O', + proname => 'brin_minmax_multi_summary_recv', provolatile => 's', + prorettype => 'pg_brin_minmax_multi_summary', proargtypes => 'internal', + prosrc => 'brin_minmax_multi_summary_recv' }, +{ oid => '9043', descr => 'I/O', + proname => 'brin_minmax_multi_summary_send', provolatile => 's', prorettype => 'bytea', + proargtypes => 'pg_brin_minmax_multi_summary', prosrc => 'brin_minmax_multi_summary_send' }, diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index a41c2e5418..c189b35a3d 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -638,3 +638,10 @@ typinput => 'brin_bloom_summary_in', typoutput => 'brin_bloom_summary_out', typreceive => 'brin_bloom_summary_recv', typsend => 'brin_bloom_summary_send', typalign => 'i', typstorage => 'x', typcollation => 'default' }, + +{ oid => '9039', oid_symbol => 'BRINMINMAXMULTISUMMARYOID', + descr => 'BRIN minmax-multi summary', + typname => 'pg_brin_minmax_multi_summary', typlen => '-1', typbyval => 'f', typcategory => 'S', + typinput => 'brin_minmax_multi_summary_in', typoutput => 'brin_minmax_multi_summary_out', + typreceive => 'brin_minmax_multi_summary_recv', typsend => 'brin_minmax_multi_summary_send', + typalign => 'i', typstorage => 'x', typcollation => 'default' }, diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 97bf9797de..55a30a6b47 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -67,14 +67,15 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); - oid | typname -------+----------------------- + oid | typname +------+------------------------------ 194 | pg_node_tree 3361 | pg_ndistinct 3402 | pg_dependencies 5017 | pg_mcv_list 9034 | pg_brin_bloom_summary -(5 rows) + 9039 | pg_brin_minmax_multi_summary +(6 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, -- 2.25.4