diff --git a/src/backend/lib/radixtree.c b/src/backend/lib/radixtree.c index bf87f932fd..2bb04eba86 100644 --- a/src/backend/lib/radixtree.c +++ b/src/backend/lib/radixtree.c @@ -16,6 +16,11 @@ * * The key is a 64-bit unsigned integer and the value is a Datum. Both internal * nodes and leaf nodes have the identical structure. For internal tree nodes, +It might worth mentioning: +- the paper refers to this technique as "Multi-value leaves" +- we chose it (I assume) for simplicity and to avoid an additional pointer traversal +- it is the reason this code currently does not support variable-length keys. + * shift > 0, store the pointer to its child node as the value. The leaf nodes, * shift == 0, also have the Datum value that is specified by the user. * @@ -24,6 +29,7 @@ * Interface * --------- * +*_search belongs here too. * radix_tree_create - Create a new, empty radix tree * radix_tree_free - Free the radix tree * radix_tree_insert - Insert a key-value pair @@ -58,12 +64,18 @@ #include /* AVX2 intrinsics */ #endif +// The name prefixes are a bit long, to shorten, maybe s/radix_tree_/rt_/ ? +// ...and same for capitalized macros -> RT_ + /* The number of bits encoded in one tree level */ +// terminology: this is not fanout, it's "span" -- ART has variable fanout (the different node types) +// maybe BITS_PER_BYTE since the entire code assumes that chunks are byte-addressable #define RADIX_TREE_NODE_FANOUT 8 /* The number of maximum slots in the node, used in node-256 */ #define RADIX_TREE_NODE_MAX_SLOTS (1 << RADIX_TREE_NODE_FANOUT) +// maybe call them "nodes indexed by array lookups" -- the actual size is unimportant and could change /* * Return the number of bits required to represent nslots slots, used * in node-128 and node-256. @@ -84,7 +96,9 @@ ((uint8) (((key) >> (shift)) & RADIX_TREE_CHUNK_MASK)) /* Mapping from the value to the bit in is-set bitmap in the node-128 and node-256 */ +// these macros assume we're addressing bytes, so maybe BITS_PER_BYTE instead of span (here referred to as fanout)? #define NODE_BITMAP_BYTE(v) ((v) / RADIX_TREE_NODE_FANOUT) +// Should this be UINT64CONST? #define NODE_BITMAP_BIT(v) (UINT64_C(1) << ((v) % RADIX_TREE_NODE_FANOUT)) /* Enum used radix_tree_node_search() */ @@ -132,6 +146,7 @@ typedef struct radix_tree_node } radix_tree_node; /* Macros for radix tree nodes */ +// not sure why are we doing casts here? #define IS_LEAF_NODE(n) (((radix_tree_node *) (n))->shift == 0) #define IS_EMPTY_NODE(n) (((radix_tree_node *) (n))->count == 0) #define NODE_HAS_FREE_SLOT(n) \ @@ -161,11 +176,14 @@ typedef struct radix_tree_node_32 Datum slots[32]; } radix_tree_node_32; +// unnecessary symbol #define RADIX_TREE_NODE_128_BITS RADIX_TREE_NODE_NSLOTS_BITS(128) typedef struct radix_tree_node_128 { radix_tree_node n; +// maybe use 0xFF for INVALID_IDX ? then we can use 0-indexing +// and if we do that, do we need isset? on creation, we can just memset slot_idx to INVALID_IDX /* * The index of slots for each fanout. 0 means unused whereas slots is * 0-indexed. So we can get the slot of the chunk C by slots[C] - 1. @@ -178,6 +196,7 @@ typedef struct radix_tree_node_128 Datum slots[128]; } radix_tree_node_128; +// unnecessary symbol #define RADIX_TREE_NODE_MAX_BITS RADIX_TREE_NODE_NSLOTS_BITS(RADIX_TREE_NODE_MAX_SLOTS) typedef struct radix_tree_node_256 { @@ -205,6 +224,7 @@ static radix_tree_node_info_elem radix_tree_node_info[] = {"radix tree node 256", 256, sizeof(radix_tree_node_256)}, }; +// this comment is about a data structure, but talks about code somewhere else /* * As we descend a radix tree, we push the node to the stack. The stack is used * at deletion. @@ -262,6 +282,7 @@ struct radix_tree static radix_tree_node *radix_tree_node_grow(radix_tree *tree, radix_tree_node *parent, radix_tree_node *node, uint64 key); +// maybe _node_find_child or _get_child because "search child" implies to me that we're searching within the child. static bool radix_tree_node_search_child(radix_tree_node *node, radix_tree_node **child_p, uint64 key); static bool radix_tree_node_search(radix_tree_node *node, Datum **slot_p, uint64 key, @@ -289,14 +310,19 @@ static void radix_tree_verify_node(radix_tree_node *node); static inline int node_32_search_eq(radix_tree_node_32 *node, uint8 chunk) { +// If we use SSE intrinsics on Windows, this code might be still be slow (see below), +// so also guard with HAVE__BUILTIN_CTZ #ifdef __AVX2__ __m256i _key = _mm256_set1_epi8(chunk); __m256i _data = _mm256_loadu_si256((__m256i_u *) node->chunks); __m256i _cmp = _mm256_cmpeq_epi8(_key, _data); uint32 bitfield = _mm256_movemask_epi8(_cmp); +// bitfield is uint32, so we don't need UINT64_C bitfield &= ((UINT64_C(1) << node->n.count) - 1); +// To make this portable, should be pg_rightmost_one_pos32(). +// Future TODO: This is slow on Windows, until will need to add the correct interfaces to pg_bitutils.h. return (bitfield) ? __builtin_ctz(bitfield) : -1; #else @@ -313,6 +339,7 @@ node_32_search_eq(radix_tree_node_32 *node, uint8 chunk) #endif /* __AVX2__ */ } +// copy-paste error: search_chunk_array_16_eq /* * This is a bit more complicated than search_chunk_array_16_eq(), because * until recently no unsigned uint8 comparison instruction existed on x86. So @@ -346,6 +373,7 @@ node_32_search_le(radix_tree_node_32 *node, uint8 chunk) #endif /* __AVX2__ */ } +// see 0xFF idea above /* Does the given chunk in the node has the value? */ static inline bool node_128_is_chunk_used(radix_tree_node_128 *node, uint8 chunk) @@ -367,6 +395,8 @@ node_128_set(radix_tree_node_128 *node, uint8 chunk, Datum val) int slotpos = 0; /* Search an unused slot */ + // this could be slow - maybe iterate over the bytes and if the byte < 0xFF then check each bit + // while (node_128_is_slot_used(node, slotpos)) slotpos++; @@ -516,6 +546,7 @@ radix_tree_extend(radix_tree *tree, uint64 key) max_shift = key_get_shift(key); + // why do we need the "max height" and not just one more? /* Grow tree from 'shift' to 'max_shift' */ while (shift <= max_shift) { @@ -752,6 +783,7 @@ radix_tree_node_insert_val(radix_tree *tree, radix_tree_node *parent, memmove(&(n4->chunks[idx + 1]), &(n4->chunks[idx]), sizeof(uint8) * (n4->n.count - idx)); memmove(&(n4->slots[idx + 1]), &(n4->slots[idx]), + // sizeof(Datum) ? sizeof(radix_tree_node *) * (n4->n.count - idx)); }