diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index bde6916184..ffb0b58826 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -287,19 +287,6 @@ RT_SCOPE void RT_STATS(RT_RADIX_TREE * tree); /* Tree level the radix tree uses */ #define RT_MAX_LEVEL ((sizeof(uint64) * BITS_PER_BYTE) / RT_SPAN) -/* - * Number of bits necessary for isset array in node48. - * Since bitmapword can be 64 bits, the only values that make sense - * here are 64 and 128. - * WIP: The paper uses at most 64 for this node kind. "isset" happens to fit - * inside a single bitmapword on most platforms, so it's a good starting - * point. We can make it higher if we need to. - */ -#define RT_SLOT_IDX_LIMIT (RT_NODE_MAX_SLOTS / 4) - -/* Invalid index used in node48 */ -#define RT_INVALID_SLOT_IDX 0xFF - /* Get a chunk from the key */ #define RT_GET_KEY_CHUNK(key, shift) ((uint8) (((key) >> (shift)) & RT_CHUNK_MASK)) @@ -426,20 +413,29 @@ typedef union RT_NODE_PTR } RT_NODE_PTR; /* - * fanout values for each size class. - * - * RT_FANOUT_4_HI and RT_FANOUT_16_HI are declared here as they are + * Symbols for maximum possible fanout are declared first as they are * required to declare each node kind. The declarations of other fanout * values are followed as they need the struct sizes of each node kind. - * - * TODO: consider 5 with subclass 1 or 2. */ /* max possible key chunks without struct padding */ -#define RT_FANOUT_4_HI (8 - sizeof(RT_NODE)) +#define RT_FANOUT_4_MAX (8 - sizeof(RT_NODE)) /* equal to two 128-bit SIMD registers, regardless of availability */ -#define RT_FANOUT_16_HI 32 +#define RT_FANOUT_16_MAX 32 + +/* + * This also determines the number of bits necessary for the isset array, + * so we need to be mindful of the size of bitmapword. + * Since bitmapword can be 64 bits, the only values that make sense + * here are 64 and 128. + * WIP: The paper uses at most 64 for this node kind. "isset" happens to fit + * inside a single bitmapword on most platforms, so it's a good starting + * point. We can make it higher if we need to. + */ +#define RT_FANOUT_48_MAX (RT_NODE_MAX_SLOTS / 4) + +#define RT_FANOUT_256 RT_NODE_MAX_SLOTS /* * Node structs, one for each "kind" @@ -448,7 +444,7 @@ typedef struct RT_NODE_4 { RT_NODE base; - uint8 chunks[RT_FANOUT_4_HI]; + uint8 chunks[RT_FANOUT_4_MAX]; /* number of children depends on size class */ RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]; @@ -458,7 +454,7 @@ typedef struct RT_NODE_16 { RT_NODE base; - uint8 chunks[RT_FANOUT_16_HI]; + uint8 chunks[RT_FANOUT_16_MAX]; /* number of children depends on size class */ RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]; @@ -476,8 +472,11 @@ typedef struct RT_NODE_48 /* The index of slots for each fanout */ uint8 slot_idxs[RT_NODE_MAX_SLOTS]; +/* Invalid index */ +#define RT_INVALID_SLOT_IDX 0xFF + /* bitmap to track which slots are in use */ - bitmapword isset[RT_BM_IDX(RT_SLOT_IDX_LIMIT)]; + bitmapword isset[RT_BM_IDX(RT_FANOUT_48_MAX)]; /* number of children depends on size class */ RT_PTR_ALLOC children[FLEXIBLE_ARRAY_MEMBER]; @@ -493,28 +492,29 @@ typedef struct RT_NODE_256 { RT_NODE base; - /* - * Zero is a valid value for embedded values, so we use a bitmap to track - * which slots are in use. - */ - bitmapword isset[RT_BM_IDX(RT_NODE_MAX_SLOTS)]; + /* bitmap to track which slots are in use */ + bitmapword isset[RT_BM_IDX(RT_FANOUT_256)]; /* Slots for 256 children */ - RT_PTR_ALLOC children[RT_NODE_MAX_SLOTS]; + RT_PTR_ALLOC children[RT_FANOUT_256]; } RT_NODE_256; -#define RT_FANOUT_4 4 -StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_HI, "watch struct padding"); - #if defined(RT_SHMEM) -/* make sure the node16 subclass and node48 fit neatly into a DSA size class */ +/* + * Make sure the all nodes (except for node256) fit neatly into a DSA size class. + * We assume the RT_FANOUT_4 is in the range where DSA size classes + * increment by 8 (as of PG17 up to 64 bytes), so we just hard + * code that one. + */ #if SIZEOF_DSA_POINTER < 8 #define RT_FANOUT_16_LO ((96 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC)) -#define RT_FANOUT_48 ((512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC)) +#define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC)) +#define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (512 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC)) #else #define RT_FANOUT_16_LO ((160 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC)) -#define RT_FANOUT_48 ((768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC)) +#define RT_FANOUT_16_HI Min(RT_FANOUT_16_MAX, (320 - offsetof(RT_NODE_16, children)) / sizeof(RT_PTR_ALLOC)) +#define RT_FANOUT_48 Min(RT_FANOUT_48_MAX, (768 - offsetof(RT_NODE_48, children)) / sizeof(RT_PTR_ALLOC)) #endif /* SIZEOF_DSA_POINTER < 8 */ #else /* ! RT_SHMEM */ @@ -522,14 +522,17 @@ StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_HI, "watch struct padding"); /* doesn't really matter, but may as well use the namesake */ #define RT_FANOUT_16_LO 16 /* use maximum possible */ -#define RT_FANOUT_48 RT_SLOT_IDX_LIMIT +#define RT_FANOUT_16_HI RT_FANOUT_16_MAX +#define RT_FANOUT_48 RT_FANOUT_48_MAX #endif /* RT_SHMEM */ -#define RT_FANOUT_256 256 +/* TODO: consider 5 with subclass 1 or 2. */ +#define RT_FANOUT_4 4 +StaticAssertDecl(RT_FANOUT_4 <= RT_FANOUT_4_MAX, "watch struct padding"); StaticAssertDecl(RT_FANOUT_16_LO < RT_FANOUT_16_HI, "LO subclass bigger than HI"); -StaticAssertDecl(RT_FANOUT_48 <= RT_SLOT_IDX_LIMIT, "more slots than isset bits"); +StaticAssertDecl(RT_FANOUT_48 <= RT_FANOUT_48_MAX, "more slots than isset bits"); /* * Node size classes @@ -1332,7 +1335,7 @@ RT_ADD_CHILD_48(RT_RADIX_TREE * tree, RT_PTR_ALLOC * ref, RT_NODE_PTR node, inverse; /* get the first word with at least one bit not set */ - for (int i = 0; i < RT_BM_IDX(RT_SLOT_IDX_LIMIT); i++) + for (int i = 0; i < RT_BM_IDX(RT_FANOUT_48_MAX); i++) { w = n48->isset[i]; if (w < ~((bitmapword) 0)) @@ -2771,7 +2774,7 @@ RT_DUMP_NODE(RT_NODE * node) } fprintf(stderr, "isset-bitmap: "); - for (int i = 0; i < (RT_SLOT_IDX_LIMIT / BITS_PER_BYTE); i++) + for (int i = 0; i < (RT_FANOUT_48_MAX / BITS_PER_BYTE); i++) { fprintf(stderr, "%s%x", sep, ((uint8 *) n48->isset)[i]); sep = " "; @@ -2802,7 +2805,7 @@ RT_DUMP_NODE(RT_NODE * node) char *sep = ""; fprintf(stderr, "isset-bitmap: "); - for (int i = 0; i < (RT_SLOT_IDX_LIMIT / BITS_PER_BYTE); i++) + for (int i = 0; i < (RT_FANOUT_256 / BITS_PER_BYTE); i++) { fprintf(stderr, "%s%x", sep, ((uint8 *) n256->isset)[i]); sep = " "; @@ -2860,7 +2863,6 @@ RT_DUMP_NODE(RT_NODE * node) #undef RT_NODE_MUST_GROW #undef RT_NODE_KIND_COUNT #undef RT_SIZE_CLASS_COUNT -#undef RT_SLOT_IDX_LIMIT #undef RT_INVALID_SLOT_IDX #undef RT_SLAB_BLOCK_SIZE #undef RT_RADIX_TREE_MAGIC