From 73f7dbf1921eea7106c93654dd7bf1fdd0888a07 Mon Sep 17 00:00:00 2001 From: Brandur Date: Tue, 16 Aug 2016 17:07:07 -0700 Subject: [PATCH] Implement SortSupport for macaddr data type Introduces a scheme to produce abbreviated keys for the macaddr type which involves packing as many of a MAC address' six bytes as possible into a Datum (and adding two zeroed bytes of padding on a 64-bit system). Sorting routines can then take advantage of the abbreviated keys by performing fast unsigned integer comparisons, resulting in a significant performance increase when operating on > 100k rows or more. This implementation is modeled closely on the addition of SortSupport for the UUID data type, which employs a very similar strategy. --- src/backend/utils/adt/mac.c | 210 ++++++++++++++++++++++++++++++++++++++++ src/include/catalog/pg_amproc.h | 1 + src/include/catalog/pg_proc.h | 2 + src/include/utils/builtins.h | 1 + 4 files changed, 214 insertions(+) diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c index 509315a..471c3f0 100644 --- a/src/backend/utils/adt/mac.c +++ b/src/backend/utils/adt/mac.c @@ -7,9 +7,13 @@ #include "postgres.h" #include "access/hash.h" +#include "lib/hyperloglog.h" #include "libpq/pqformat.h" +#include "port/pg_bswap.h" #include "utils/builtins.h" +#include "utils/guc.h" #include "utils/inet.h" +#include "utils/sortsupport.h" /* @@ -22,6 +26,21 @@ #define lobits(addr) \ ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) +/* sortsupport for macaddr */ +typedef struct +{ + int64 input_count; /* number of non-null values seen */ + bool estimating; /* true if estimating cardinality */ + + hyperLogLogState abbr_card; /* cardinality estimator */ +} macaddr_sortsupport_state; + +static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); +static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); +static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); +static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); +static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); + /* * MAC address reader. Accepts several common notations. */ @@ -318,3 +337,194 @@ macaddr_trunc(PG_FUNCTION_ARGS) PG_RETURN_MACADDR_P(result); } + +/* + * SortSupport strategy function. Populates a SortSupport struct with the + * information necessary to use comparison by abbreviated keys. + */ +Datum +macaddr_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + + ssup->comparator = macaddr_fast_cmp; + ssup->ssup_extra = NULL; + + if (ssup->abbreviate) + { + macaddr_sortsupport_state *uss; + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + uss = palloc(sizeof(macaddr_sortsupport_state)); + uss->input_count = 0; + uss->estimating = true; + initHyperLogLog(&uss->abbr_card, 10); + + ssup->ssup_extra = uss; + + ssup->comparator = macaddr_cmp_abbrev; + ssup->abbrev_converter = macaddr_abbrev_convert; + ssup->abbrev_abort = macaddr_abbrev_abort; + ssup->abbrev_full_comparator = macaddr_fast_cmp; + + MemoryContextSwitchTo(oldcontext); + } + + PG_RETURN_VOID(); +} + +/* + * SortSupport "traditional" comparison function. Pulls two MAC addresses from + * the heap and runs a standard comparison on them. + */ +static int +macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) +{ + macaddr *arg1 = DatumGetMacaddrP(x); + macaddr *arg2 = DatumGetMacaddrP(y); + + return macaddr_cmp_internal(arg1, arg2); +} + +/* + * SortSupport abbreviated key comparison function. Compares two MAC addresses + * quickly by treating them like integers, and without having to go to the + * heap. + */ +static int +macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; +} + +/* + * Callback for estimating effectiveness of abbreviated key optimization. + * + * We pay no attention to the cardinality of the non-abbreviated data, because + * there is no equality fast-path within the standard macaddr comparator. + */ +static bool +macaddr_abbrev_abort(int memtupcount, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + double abbr_card; + + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) + return false; + + abbr_card = estimateHyperLogLog(&uss->abbr_card); + + /* + * If we have >100k distinct values, then even if we were sorting many + * billion rows we'd likely still break even, and the penalty of undoing + * that many rows of abbrevs would probably not be worth it. At this point + * we stop counting because we know that we're now fully committed. + */ + if (abbr_card > 100000.0) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: estimation ends at cardinality %f" + " after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count, memtupcount); +#endif + uss->estimating = false; + return false; + } + + /* + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row + * fudge factor allows us to abort earlier on genuinely pathological data + * where we've had exactly one abbreviated value in the first 2k + * (non-null) rows. + */ + if (abbr_card < uss->input_count / 2000.0 + 0.5) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: aborting abbreviation at cardinality %f" + " below threshold %f after " INT64_FORMAT " values (%d rows)", + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, + memtupcount); +#endif + return true; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "macaddr_abbrev: cardinality %f after " INT64_FORMAT + " values (%d rows)", abbr_card, uss->input_count, memtupcount); +#endif + + return false; +} + +/* + * SortSupport converstion routine. Converts original macaddr representation + * to abbreviated key representation. + * + * Packs as many bytes from a 6-byte MAC addresses into a Datum and treats it + * as an unsigned integer for purposes of comparison. On a 64-bit machine, + * there will be two zeroed bytes of padding. + */ +static Datum +macaddr_abbrev_convert(Datum original, SortSupport ssup) +{ + macaddr_sortsupport_state *uss = ssup->ssup_extra; + macaddr *authoritative = DatumGetMacaddrP(original); + Datum res; + + /* + * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of + * the MAC address in. There will be two bytes of zero padding on the least + * significant end. + */ +#if SIZEOF_DATUM == 8 + memset(&res, 0, SIZEOF_DATUM); + memcpy(&res, authoritative, sizeof(macaddr)); +#else /* SIZEOF_DATUM != 8 */ + memcpy(&res, authoritative, SIZEOF_DATUM); +#endif + uss->input_count += 1; + + /* + * Cardinality estimation. The estimate uses uint32, so on a 64-bit + * architecture, XOR the two 32-bit halves together to produce slightly + * more entropy. The two zeroed bytes won't have any practical impact on + * this operation. + */ + if (uss->estimating) + { + uint32 tmp; + +#if SIZEOF_DATUM == 8 + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); +#else /* SIZEOF_DATUM != 8 */ + tmp = (uint32) res; +#endif + + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); + } + + /* + * Byteswap on little-endian machines. + * + * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way + * comparator) works correctly on all platforms. Without this, the + * comparator would have to call memcmp() with a pair of pointers to the + * first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + + return res; +} diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 00320b4..b66c074 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -117,6 +117,7 @@ DATA(insert ( 1976 20 23 1 2189 )); DATA(insert ( 1976 20 21 1 2193 )); DATA(insert ( 1982 1186 1186 1 1315 )); DATA(insert ( 1984 829 829 1 836 )); +DATA(insert ( 1984 829 829 2 3401 )); DATA(insert ( 1986 19 19 1 359 )); DATA(insert ( 1986 19 19 2 3135 )); DATA(insert ( 1988 1700 1700 1 1769 )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index af19c1a..2804cd6 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2106,6 +2106,8 @@ DATA(insert OID = 834 ( macaddr_ge PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 DATA(insert OID = 835 ( macaddr_ne PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_ne _null_ _null_ _null_ )); DATA(insert OID = 836 ( macaddr_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 23 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_cmp _null_ _null_ _null_ )); DESCR("less-equal-greater"); +DATA(insert OID = 3401 ( macaddr_sortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ macaddr_sortsupport _null_ _null_ _null_ )); +DESCR("sort support"); DATA(insert OID = 3144 ( macaddr_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 829 "829" _null_ _null_ _null_ _null_ _null_ macaddr_not _null_ _null_ _null_ )); DATA(insert OID = 3145 ( macaddr_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_and _null_ _null_ _null_ )); DATA(insert OID = 3146 ( macaddr_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 829 "829 829" _null_ _null_ _null_ _null_ _null_ macaddr_or _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index a91be98..fc60b63 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -1018,6 +1018,7 @@ extern Datum macaddr_not(PG_FUNCTION_ARGS); extern Datum macaddr_and(PG_FUNCTION_ARGS); extern Datum macaddr_or(PG_FUNCTION_ARGS); extern Datum macaddr_trunc(PG_FUNCTION_ARGS); +extern Datum macaddr_sortsupport(PG_FUNCTION_ARGS); extern Datum hashmacaddr(PG_FUNCTION_ARGS); /* numeric.c */ -- 2.7.2