From 21bda3f9707db1bfab74fe3a7a3a6e1e88df5ee4 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 11 Mar 2024 13:21:10 +0700 Subject: [PATCH v6901 5/5] WIP: 3 embeddable TIDs --- src/backend/access/common/tidstore.c | 87 ++++++++++++++++--- src/include/lib/radixtree.h | 19 ++++ .../test_tidstore/expected/test_tidstore.out | 31 ++----- .../test_tidstore/sql/test_tidstore.sql | 2 +- 4 files changed, 100 insertions(+), 39 deletions(-) diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c index dcdbd12c2e..8c04e7734a 100644 --- a/src/backend/access/common/tidstore.c +++ b/src/backend/access/common/tidstore.c @@ -47,7 +47,25 @@ */ typedef struct BlocktableEntry { - uint16 nwords; + union + { + struct + { +#ifndef WORDS_BIGENDIAN + uint8 flags; + int8 nwords; +#endif + + OffsetNumber full_offsets[3]; + +#ifdef WORDS_BIGENDIAN + int8 nwords; + uint8 flags; +#endif + } ; + uintptr_t ptr; + } header; + bitmapword words[FLEXIBLE_ARRAY_MEMBER]; } BlocktableEntry; #define MaxBlocktableEntrySize \ @@ -61,7 +79,8 @@ typedef struct BlocktableEntry #define RT_VALUE_TYPE BlocktableEntry #define RT_VARLEN_VALUE_SIZE(page) \ (offsetof(BlocktableEntry, words) + \ - sizeof(bitmapword) * (page)->nwords) + sizeof(bitmapword) * (page)->header.nwords) +#define RT_RUNTIME_EMBEDDABLE_VALUE #include "lib/radixtree.h" #define RT_PREFIX shared_rt @@ -72,7 +91,8 @@ typedef struct BlocktableEntry #define RT_VALUE_TYPE BlocktableEntry #define RT_VARLEN_VALUE_SIZE(page) \ (offsetof(BlocktableEntry, words) + \ - sizeof(bitmapword) * (page)->nwords) + sizeof(bitmapword) * (page)->header.nwords) +#define RT_RUNTIME_EMBEDDABLE_VALUE #include "lib/radixtree.h" /* Per-backend state for a TidStore */ @@ -307,7 +327,17 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, bool found PG_USED_FOR_ASSERTS_ONLY; Assert(num_offsets > 0); + memset(page, 0, sizeof(BlocktableEntry)); + if (num_offsets <= 3) + { + for (int i = 0; i < num_offsets; i++) + page->header.full_offsets[i] = offsets[i]; + + page->header.nwords = 0; + } + else + { for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD; wordnum <= WORDNUM(offsets[num_offsets - 1]); wordnum++, next_word_threshold += BITS_PER_BITMAPWORD) @@ -333,8 +363,9 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, page->words[wordnum] = word; } - page->nwords = wordnum; - Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1])); + page->header.nwords = wordnum; + Assert(page->header.nwords == WORDS_PER_PAGE(offsets[num_offsets - 1])); + } if (TidStoreIsShared(ts)) found = shared_rt_set(ts->tree.shared, blkno, page); @@ -370,17 +401,37 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid) /* no entry for the blk */ if (page == NULL) - return false; + { + ret = false; + goto finish; + } wordnum = WORDNUM(off); bitnum = BITNUM(off); - /* no bitmap for the off */ - if (wordnum >= page->nwords) - return false; - - ret = (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0; + if (page->header.nwords == 0) + { + /* we have offsets in the header */ + for (int i = 0; i < lengthof(page->header.full_offsets); i++) + { + if (page->header.full_offsets[i] == off) + { + ret = true; + goto finish; + } + } + ret = false; + } + else + { + /* No bitmap for the off */ + if (wordnum >= page->header.nwords) + ret = false; + else + ret = (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0; + } +finish: #ifdef TIDSTORE_DEBUG if (!TidStoreIsShared(ts)) { @@ -388,6 +439,7 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid) Assert(ret == ret_debug); } #endif + return ret; } @@ -504,7 +556,18 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlocktableEntry *page) TidStoreIterResult *result = (&iter->output); int wordnum; - for (wordnum = 0; wordnum < page->nwords; wordnum++) + if (page->header.nwords == 0) + { + /* we have offsets in the header */ + for (int i = 0; i < 3; i++) + { + if (OffsetNumberIsValid(page->header.full_offsets[i])) + result->offsets[result->num_offsets++] = page->header.full_offsets[i]; + } + return; + } + + for (wordnum = 0; wordnum < page->header.nwords; wordnum++) { bitmapword w = page->words[wordnum]; diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h index b8ad51c14d..108f21aa08 100644 --- a/src/include/lib/radixtree.h +++ b/src/include/lib/radixtree.h @@ -437,7 +437,13 @@ static inline bool RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p) { #ifdef RT_VARLEN_VALUE_SIZE + +#ifdef RT_RUNTIME_EMBEDDABLE_VALUE + return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC); +#else return false; +#endif + #else return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC); #endif @@ -451,7 +457,14 @@ static inline bool RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child) { #ifdef RT_VARLEN_VALUE_SIZE + +#ifdef RT_RUNTIME_EMBEDDABLE_VALUE + /* check for pointer tag */ + return ((uintptr_t) child) & 1; +#else return false; +#endif + #else return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC); #endif @@ -1727,6 +1740,11 @@ have_slot: { /* store value directly in child pointer slot */ memcpy(slot, value_p, value_sz); + +#ifdef RT_RUNTIME_EMBEDDABLE_VALUE + /* tag child pointer */ + *((uintptr_t *) slot) |= 1; +#endif } else { @@ -2878,6 +2896,7 @@ RT_DUMP_NODE(RT_NODE * node) #undef RT_DEFINE #undef RT_VALUE_TYPE #undef RT_VARLEN_VALUE_SIZE +#undef RT_RUNTIME_EMBEDDABLE_VALUE #undef RT_SHMEM #undef RT_USE_DELETE #undef RT_DEBUG diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out index 5e0517f3c3..054cd57ad2 100644 --- a/src/test/modules/test_tidstore/expected/test_tidstore.out +++ b/src/test/modules/test_tidstore/expected/test_tidstore.out @@ -51,7 +51,7 @@ WITH blocks (blk) AS( VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno) ), offsets (off) AS ( -VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) +VALUES (1), (2)--, (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) ) SELECT row_number() over(ORDER BY blk), tidstore_set_block_offsets(blk, array_agg(offsets.off)::int2[]) @@ -68,19 +68,13 @@ SELECT row_number() over(ORDER BY blk), -- Lookup test and dump (sorted) tids. SELECT lookup_test(); - lookup_test ------------------- + lookup_test +---------------- (0,1) (0,2) - (0,256) - (0,511) - (0,512) (4294967295,1) (4294967295,2) - (4294967295,256) - (4294967295,511) - (4294967295,512) -(10 rows) +(4 rows) SELECT tidstore_is_full(); tidstore_is_full @@ -93,30 +87,15 @@ SELECT tidstore_dump_tids(); -------------------- (0,1) (0,2) - (0,256) - (0,511) - (0,512) (1,1) (1,2) - (1,256) - (1,511) - (1,512) (2147483647,1) (2147483647,2) - (2147483647,256) - (2147483647,511) - (2147483647,512) (4294967294,1) (4294967294,2) - (4294967294,256) - (4294967294,511) - (4294967294,512) (4294967295,1) (4294967295,2) - (4294967295,256) - (4294967295,511) - (4294967295,512) -(25 rows) +(10 rows) -- cleanup SELECT tidstore_destroy(); diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql index 4e626b74e3..b29c6e797e 100644 --- a/src/test/modules/test_tidstore/sql/test_tidstore.sql +++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql @@ -41,7 +41,7 @@ WITH blocks (blk) AS( VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno) ), offsets (off) AS ( -VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) +VALUES (1), (2)--, (:maxoffset / 2), (:maxoffset - 1), (:maxoffset) ) SELECT row_number() over(ORDER BY blk), tidstore_set_block_offsets(blk, array_agg(offsets.off)::int2[]) -- 2.44.0