diff --git a/contrib/bench_radix_tree/bench_radix_tree--1.0.sql b/contrib/bench_radix_tree/bench_radix_tree--1.0.sql index 67ba568531..2fd689aa91 100644 --- a/contrib/bench_radix_tree/bench_radix_tree--1.0.sql +++ b/contrib/bench_radix_tree/bench_radix_tree--1.0.sql @@ -63,3 +63,14 @@ OUT rt_search_ms int8 returns record as 'MODULE_PATHNAME' LANGUAGE C STRICT VOLATILE PARALLEL UNSAFE; + +create function bench_node128_load( +fanout int4, +OUT fanout int4, +OUT nkeys int8, +OUT rt_mem_allocated int8, +OUT rt_sparseload_ms int8 +) +returns record +as 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE PARALLEL UNSAFE; diff --git a/contrib/bench_radix_tree/bench_radix_tree.c b/contrib/bench_radix_tree/bench_radix_tree.c index e69be48448..b035b3a747 100644 --- a/contrib/bench_radix_tree/bench_radix_tree.c +++ b/contrib/bench_radix_tree/bench_radix_tree.c @@ -31,6 +31,7 @@ PG_FUNCTION_INFO_V1(bench_shuffle_search); PG_FUNCTION_INFO_V1(bench_load_random_int); PG_FUNCTION_INFO_V1(bench_fixed_height_search); PG_FUNCTION_INFO_V1(bench_search_random_nodes); +PG_FUNCTION_INFO_V1(bench_node128_load); static uint64 tid_to_key_off(ItemPointer tid, uint32 *off) @@ -552,3 +553,85 @@ finish_search: rt_free(rt); PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } + +Datum +bench_node128_load(PG_FUNCTION_ARGS) +{ + int fanout = PG_GETARG_INT32(0); + radix_tree *rt; + TupleDesc tupdesc; + TimestampTz start_time, + end_time; + long secs; + int usecs; + int64 rt_sparseload_ms; + Datum values[5]; + bool nulls[5]; + + uint64 r, + h; + int key_id; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + rt = rt_create(CurrentMemoryContext); + + key_id = 0; + + for (r = 1; r <= fanout; r++) + { + for (h = 1; h <= fanout; h++) + { + uint64 key; + + key = (r << 8) | (h); + + key_id++; + rt_set(rt, key, key_id); + } + } + + rt_stats(rt); + + /* measure sparse deletion and re-loading */ + start_time = GetCurrentTimestamp(); + + for (int t = 0; t<10000; t++) + { + /* delete one key in each leaf */ + for (r = 1; r <= fanout; r++) + { + uint64 key; + + key = (r << 8) | (fanout); + + rt_delete(rt, key); + } + + /* add them all back */ + key_id = 0; + for (r = 1; r <= fanout; r++) + { + uint64 key; + + key = (r << 8) | (fanout); + + key_id++; + rt_set(rt, key, key_id); + } + } + end_time = GetCurrentTimestamp(); + TimestampDifference(start_time, end_time, &secs, &usecs); + rt_sparseload_ms = secs * 1000 + usecs / 1000; + + MemSet(nulls, false, sizeof(nulls)); + values[0] = Int32GetDatum(fanout); + values[1] = Int64GetDatum(rt_num_entries(rt)); + values[2] = Int64GetDatum(rt_memory_usage(rt)); + values[3] = Int64GetDatum(rt_sparseload_ms); + + rt_free(rt); + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} diff --git a/src/backend/lib/radixtree.c b/src/backend/lib/radixtree.c index f10abd8add..9cfed1624f 100644 --- a/src/backend/lib/radixtree.c +++ b/src/backend/lib/radixtree.c @@ -262,6 +262,9 @@ typedef struct rt_node_inner_128 { rt_node_base_128 base; + /* isset is a bitmap to track which slot is in use */ + bitmapword isset[WORDNUM(128)]; + /* number of children depends on size class */ rt_node *children[FLEXIBLE_ARRAY_MEMBER]; } rt_node_inner_128; @@ -271,7 +274,7 @@ typedef struct rt_node_leaf_128 rt_node_base_128 base; /* isset is a bitmap to track which slot is in use */ - uint8 isset[RT_NODE_NSLOTS_BITS(128)]; + bitmapword isset[WORDNUM(128)]; /* number of values depends on size class */ uint64 values[FLEXIBLE_ARRAY_MEMBER]; @@ -679,14 +682,14 @@ static inline bool node_inner_128_is_slot_used(rt_node_inner_128 *node, uint8 slot) { Assert(!NODE_IS_LEAF(node)); - return (node->children[slot] != NULL); + return (node->isset[WORDNUM(slot)] & ((bitmapword) 1 << BITNUM(slot))) != 0; } static inline bool node_leaf_128_is_slot_used(rt_node_leaf_128 *node, uint8 slot) { Assert(NODE_IS_LEAF(node)); - return ((node->isset[RT_NODE_BITMAP_BYTE(slot)] & RT_NODE_BITMAP_BIT(slot)) != 0); + return (node->isset[WORDNUM(slot)] & ((bitmapword) 1 << BITNUM(slot))) != 0; } static inline rt_node * @@ -707,7 +710,10 @@ node_leaf_128_get_value(rt_node_leaf_128 *node, uint8 chunk) static void node_inner_128_delete(rt_node_inner_128 *node, uint8 chunk) { + int slotpos = node->base.slot_idxs[chunk]; + Assert(!NODE_IS_LEAF(node)); + node->isset[WORDNUM(slotpos)] &= ~((bitmapword) 1 << BITNUM(slotpos)); node->base.slot_idxs[chunk] = RT_NODE_128_INVALID_IDX; } @@ -717,41 +723,32 @@ node_leaf_128_delete(rt_node_leaf_128 *node, uint8 chunk) int slotpos = node->base.slot_idxs[chunk]; Assert(NODE_IS_LEAF(node)); - node->isset[RT_NODE_BITMAP_BYTE(slotpos)] &= ~(RT_NODE_BITMAP_BIT(slotpos)); + node->isset[WORDNUM(slotpos)] &= ~((bitmapword) 1 << BITNUM(slotpos)); node->base.slot_idxs[chunk] = RT_NODE_128_INVALID_IDX; } /* Return an unused slot in node-128 */ static int -node_inner_128_find_unused_slot(rt_node_inner_128 *node, uint8 chunk) -{ - int slotpos = 0; - - Assert(!NODE_IS_LEAF(node)); - while (node_inner_128_is_slot_used(node, slotpos)) - slotpos++; - - return slotpos; -} - -static int -node_leaf_128_find_unused_slot(rt_node_leaf_128 *node, uint8 chunk) +node128_find_unused_slot(bitmapword *isset) { int slotpos; + int idx; + bitmapword inverse; - Assert(NODE_IS_LEAF(node)); - - /* We iterate over the isset bitmap per byte then check each bit */ - for (slotpos = 0; slotpos < RT_NODE_NSLOTS_BITS(128); slotpos++) + /* get the first word with at least one bit not set */ + for (idx = 0; idx < WORDNUM(128); idx++) { - if (node->isset[slotpos] < 0xFF) + if (isset[idx] < ~((bitmapword) 0)) break; } - Assert(slotpos < RT_NODE_NSLOTS_BITS(128)); - slotpos *= BITS_PER_BYTE; - while (node_leaf_128_is_slot_used(node, slotpos)) - slotpos++; + /* To get the first unset bit in X, get the first set bit in ~X */ + inverse = ~(isset[idx]); + slotpos = idx * BITS_PER_BITMAPWORD; + slotpos += bmw_rightmost_one_pos(inverse); + + /* mark the slot used */ + isset[idx] |= RIGHTMOST_ONE(inverse); return slotpos; } @@ -763,8 +760,7 @@ node_inner_128_insert(rt_node_inner_128 *node, uint8 chunk, rt_node *child) Assert(!NODE_IS_LEAF(node)); - /* find unused slot */ - slotpos = node_inner_128_find_unused_slot(node, chunk); + slotpos = node128_find_unused_slot(node->isset); node->base.slot_idxs[chunk] = slotpos; node->children[slotpos] = child; @@ -778,11 +774,9 @@ node_leaf_128_insert(rt_node_leaf_128 *node, uint8 chunk, uint64 value) Assert(NODE_IS_LEAF(node)); - /* find unused slot */ - slotpos = node_leaf_128_find_unused_slot(node, chunk); + slotpos = node128_find_unused_slot(node->isset); node->base.slot_idxs[chunk] = slotpos; - node->isset[RT_NODE_BITMAP_BYTE(slotpos)] |= RT_NODE_BITMAP_BIT(slotpos); node->values[slotpos] = value; } @@ -2508,9 +2502,9 @@ rt_dump_node(rt_node *node, int level, bool recurse) rt_node_leaf_128 *n = (rt_node_leaf_128 *) node; fprintf(stderr, ", isset-bitmap:"); - for (int i = 0; i < 16; i++) + for (int i = 0; i < WORDNUM(128); i++) { - fprintf(stderr, "%X ", (uint8) n->isset[i]); + fprintf(stderr, "%lX ", n->isset[i]); } fprintf(stderr, "\n"); } diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index b7b274aeff..3fe0fd88ce 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -23,49 +23,11 @@ #include "common/hashfn.h" #include "nodes/bitmapset.h" #include "nodes/pg_list.h" -#include "port/pg_bitutils.h" -#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) -#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) - #define BITMAPSET_SIZE(nwords) \ (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) -/*---------- - * This is a well-known cute trick for isolating the rightmost one-bit - * in a word. It assumes two's complement arithmetic. Consider any - * nonzero value, and focus attention on the rightmost one. The value is - * then something like - * xxxxxx10000 - * where x's are unspecified bits. The two's complement negative is formed - * by inverting all the bits and adding one. Inversion gives - * yyyyyy01111 - * where each y is the inverse of the corresponding x. Incrementing gives - * yyyyyy10000 - * and then ANDing with the original value gives - * 00000010000 - * This works for all cases except original value = zero, where of course - * we get zero. - *---------- - */ -#define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x))) - -#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) - -/* Select appropriate bit-twiddling functions for bitmap word size */ -#if BITS_PER_BITMAPWORD == 32 -#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos32(w) -#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos32(w) -#define bmw_popcount(w) pg_popcount32(w) -#elif BITS_PER_BITMAPWORD == 64 -#define bmw_leftmost_one_pos(w) pg_leftmost_one_pos64(w) -#define bmw_rightmost_one_pos(w) pg_rightmost_one_pos64(w) -#define bmw_popcount(w) pg_popcount64(w) -#else -#error "invalid BITS_PER_BITMAPWORD" -#endif - /* * bms_copy - make a palloc'd copy of a bitmapset diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 2792281658..06fa21ccaa 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -21,33 +21,13 @@ #define BITMAPSET_H #include "nodes/nodes.h" +#include "port/pg_bitutils.h" /* * Forward decl to save including pg_list.h */ struct List; -/* - * Data representation - * - * Larger bitmap word sizes generally give better performance, so long as - * they're not wider than the processor can handle efficiently. We use - * 64-bit words if pointers are that large, else 32-bit words. - */ -#if SIZEOF_VOID_P >= 8 - -#define BITS_PER_BITMAPWORD 64 -typedef uint64 bitmapword; /* must be an unsigned type */ -typedef int64 signedbitmapword; /* must be the matching signed type */ - -#else - -#define BITS_PER_BITMAPWORD 32 -typedef uint32 bitmapword; /* must be an unsigned type */ -typedef int32 signedbitmapword; /* must be the matching signed type */ - -#endif - typedef struct Bitmapset { pg_node_attr(custom_copy_equal, special_read_write) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 814e0b2dba..ad5aa2c5cf 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -17,6 +17,51 @@ extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256]; extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256]; extern PGDLLIMPORT const uint8 pg_number_of_ones[256]; +/* + * Platform-specific types + * + * Larger bitmap word sizes generally give better performance, so long as + * they're not wider than the processor can handle efficiently. We use + * 64-bit words if pointers are that large, else 32-bit words. + */ +#if SIZEOF_VOID_P >= 8 + +#define BITS_PER_BITMAPWORD 64 +typedef uint64 bitmapword; /* must be an unsigned type */ +typedef int64 signedbitmapword; /* must be the matching signed type */ + +#else + +#define BITS_PER_BITMAPWORD 32 +typedef uint32 bitmapword; /* must be an unsigned type */ +typedef int32 signedbitmapword; /* must be the matching signed type */ + +#endif + +#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) +#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) + +/*---------- + * This is a well-known cute trick for isolating the rightmost one-bit + * in a word. It assumes two's complement arithmetic. Consider any + * nonzero value, and focus attention on the rightmost one. The value is + * then something like + * xxxxxx10000 + * where x's are unspecified bits. The two's complement negative is formed + * by inverting all the bits and adding one. Inversion gives + * yyyyyy01111 + * where each y is the inverse of the corresponding x. Incrementing gives + * yyyyyy10000 + * and then ANDing with the original value gives + * 00000010000 + * This works for all cases except original value = zero, where of course + * we get zero. + *---------- + */ +#define RIGHTMOST_ONE(x) ((signedbitmapword) (x) & -((signedbitmapword) (x))) + +#define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) + /* * pg_leftmost_one_pos32 * Returns the position of the most significant set bit in "word", @@ -291,4 +336,17 @@ pg_rotate_left32(uint32 word, int n) #define pg_prevpower2_size_t pg_prevpower2_64 #endif +/* variants of some functions for bitmap word size */ +#if BITS_PER_BITMAPWORD == 32 +#define bmw_leftmost_one_pos pg_leftmost_one_pos32 +#define bmw_rightmost_one_pos pg_rightmost_one_pos32 +#define bmw_popcount pg_popcount32 +#elif BITS_PER_BITMAPWORD == 64 +#define bmw_leftmost_one_pos pg_leftmost_one_pos64 +#define bmw_rightmost_one_pos pg_rightmost_one_pos64 +#define bmw_popcount pg_popcount64 +#else +#error "invalid BITS_PER_BITMAPWORD" +#endif + #endif /* PG_BITUTILS_H */