From 2a8048beb4623f7b0d98763658252a74619cd887 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 5 Jan 2024 13:00:35 +1300 Subject: [PATCH v3] Fix REALLOCATE_BITMAPSETS code 7d58f2342 added a compile-time option to have bitmapset.c reallocate the set before returning when a set is modified. That commit failed to do its job in various cases and returned the input set when it shouldn't have in these cases. This commit also adds some documentation about what REALLOCATE_BITMAPSETS is for. This is important as future functions that go inside bitmapset.c need to know if they need to do anything special when this compile-time option is defined. Also, between 71a3e8c43 and 7d58f2342 some Asserts seem to have become duplicated. Here we tidy these up. Rather than having the Assert check each aspect of what makes a set invalid, this commit introduces a helper function which returns false when a set is invalid. Have the Asserts use this instead. Here we also make a pass on improving various comments in bitmapset.c. Various comments talked about the input sets being "recycled" which could be interpreted to mean that the output set will always point to the same memory as the given input parameter. Here we try to make it clear that this must not be relied upon and that callers must ensure that all references to a given set are updated on each modification. In passing, improve comments for bms_union(), bms_intersect() and bms_difference() to detail what they do. I (David) have too often had to remind myself by reading the code each time to find out if I need, for example, to use bms_union() or bms_join(). Author: Richard Guo, David Rowley Discussion: https://postgr.es/m/CAMbWs4-djy9qYux2gZrtmxA0StrYXJjvB-oqLxn-d7J88t=PQQ@mail.gmail.com --- src/backend/nodes/bitmapset.c | 313 ++++++++++++++++++++-------------- 1 file changed, 189 insertions(+), 124 deletions(-) diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index f4b61085be..d9b9453270 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -16,6 +16,18 @@ * containing only a single word (likely the majority of them) this halves the * number of loop condition checks. * + * Callers must ensure that the set returned by functions in this file which + * adjust the members of an existing set is assigned to all pointers pointing + * to that existing set. No guarantees are made that we'll ever modify the + * existing set in-place and return it. + * + * To help find bugs caused by callers failing to record the return value of + * the function which manipulates an existing set, we support building with + * REALLOCATE_BITMAPSETS. This results in the set being reallocated each time + * the set is altered and the existing being pfreed. This is useful as if any + * references still exist to the old set, we're more likely to notice as + * any users of the old set will be accessing pfree'd memory. This option is + * only intended to be used for debugging. * * Copyright (c) 2003-2024, PostgreSQL Global Development Group * @@ -72,6 +84,49 @@ #error "invalid BITS_PER_BITMAPWORD" #endif +#ifdef USE_ASSERT_CHECKING +/* + * bms_is_valid_set - for cassert builds to check for valid sets + */ +static bool +bms_is_valid_set(const Bitmapset *a) +{ + /* NULL is the correct representation of an empty set */ + if (a == NULL) + return true; + + /* check the node tag is set correctly. pfree'd pointer, maybe? */ + if (!IsA(a, Bitmapset)) + return false; + + /* trailing zero words are not allowed */ + if (a->words[a->nwords - 1] == 0) + return false; + + return true; +} +#endif + +#ifdef REALLOCATE_BITMAPSETS +/* + * bms_copy_and_free + * Only required in REALLOCATE_BITMAPSETS builds. Provide a simple way + * to return a freshly allocated set and pfree the original. + * + * Note: callers which accept multiple sets must be careful when calling this + * function to clone one parameter as other parameters may point to the same + * set. A good option is to call this just before returning the resulting + * set. + */ +static Bitmapset * +bms_copy_and_free(Bitmapset *a) +{ + Bitmapset *c = bms_copy(a); + + bms_free(a); + return c; +} +#endif /* * bms_copy - make a palloc'd copy of a bitmapset @@ -82,9 +137,11 @@ bms_copy(const Bitmapset *a) Bitmapset *result; size_t size; + Assert(bms_is_valid_set(a)); + if (a == NULL) return NULL; - Assert(IsA(a, Bitmapset)); + size = BITMAPSET_SIZE(a->nwords); result = (Bitmapset *) palloc(size); memcpy(result, a, size); @@ -99,8 +156,8 @@ bms_equal(const Bitmapset *a, const Bitmapset *b) { int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -140,8 +197,8 @@ bms_compare(const Bitmapset *a, const Bitmapset *b) { int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -206,7 +263,8 @@ bms_free(Bitmapset *a) /* - * bms_union - set union + * bms_union - create and return a new set containing all members from both + * input sets. Both inputs are left unmodified. */ Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b) @@ -216,8 +274,8 @@ bms_union(const Bitmapset *a, const Bitmapset *b) int otherlen; int i; - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -246,7 +304,8 @@ bms_union(const Bitmapset *a, const Bitmapset *b) } /* - * bms_intersect - set intersection + * bms_intersect - create and return a new set containing members which both + * input sets have in common. Both inputs are left unmodified. */ Bitmapset * bms_intersect(const Bitmapset *a, const Bitmapset *b) @@ -257,8 +316,8 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) int resultlen; int i; - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL || b == NULL) @@ -299,7 +358,8 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b) } /* - * bms_difference - set difference (ie, A without members of B) + * bms_difference - create and return a new set containing all the members of + * 'a' without the members of 'b'. */ Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b) @@ -307,8 +367,8 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) Bitmapset *result; int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -316,8 +376,6 @@ bms_difference(const Bitmapset *a, const Bitmapset *b) if (b == NULL) return bms_copy(a); - Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset)); - /* * In Postgres' usage, an empty result is a very common case, so it's * worth optimizing for that by testing bms_nonempty_difference(). This @@ -374,8 +432,8 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b) { int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -383,8 +441,6 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b) if (b == NULL) return false; - Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset)); - /* 'a' can't be a subset of 'b' if it contains more words */ if (a->nwords > b->nwords) return false; @@ -411,8 +467,8 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) int shortlen; int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -424,8 +480,6 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b) if (b == NULL) return BMS_SUBSET2; - Assert(IsA(a, Bitmapset) && IsA(b, Bitmapset)); - /* Check common words */ result = BMS_EQUAL; /* status so far */ shortlen = Min(a->nwords, b->nwords); @@ -477,14 +531,14 @@ bms_is_member(int x, const Bitmapset *a) int wordnum, bitnum; + Assert(bms_is_valid_set(a)); + /* XXX better to just return false for x<0 ? */ if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); if (a == NULL) return false; - Assert(IsA(a, Bitmapset)); - wordnum = WORDNUM(x); bitnum = BITNUM(x); if (wordnum >= a->nwords) @@ -509,12 +563,12 @@ bms_member_index(Bitmapset *a, int x) int result = 0; bitmapword mask; + Assert(bms_is_valid_set(a)); + /* return -1 if not a member of the bitmap */ if (!bms_is_member(x, a)) return -1; - Assert(IsA(a, Bitmapset)); - wordnum = WORDNUM(x); bitnum = BITNUM(x); @@ -549,8 +603,8 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b) int shortlen; int i; - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL || b == NULL) @@ -576,7 +630,7 @@ bms_overlap_list(const Bitmapset *a, const List *b) int wordnum, bitnum; - Assert(a == NULL || IsA(a, Bitmapset)); + Assert(bms_is_valid_set(a)); if (a == NULL || b == NIL) return false; @@ -607,8 +661,8 @@ bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b) { int i; - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -640,11 +694,11 @@ bms_singleton_member(const Bitmapset *a) int nwords; int wordnum; + Assert(bms_is_valid_set(a)); + if (a == NULL) elog(ERROR, "bitmapset is empty"); - Assert(IsA(a, Bitmapset)); - nwords = a->nwords; wordnum = 0; do @@ -683,9 +737,11 @@ bms_get_singleton_member(const Bitmapset *a, int *member) int nwords; int wordnum; + Assert(bms_is_valid_set(a)); + if (a == NULL) return false; - Assert(IsA(a, Bitmapset)); + nwords = a->nwords; wordnum = 0; do @@ -717,9 +773,11 @@ bms_num_members(const Bitmapset *a) int nwords; int wordnum; + Assert(bms_is_valid_set(a)); + if (a == NULL) return 0; - Assert(IsA(a, Bitmapset)); + nwords = a->nwords; wordnum = 0; do @@ -745,9 +803,11 @@ bms_membership(const Bitmapset *a) int nwords; int wordnum; + Assert(bms_is_valid_set(a)); + if (a == NULL) return BMS_EMPTY_SET; - Assert(IsA(a, Bitmapset)); + nwords = a->nwords; wordnum = 0; do @@ -778,7 +838,7 @@ bms_membership(const Bitmapset *a) /* * bms_add_member - add a specified member to set * - * Input set is modified or recycled! + * 'a' is recycled when possible. */ Bitmapset * bms_add_member(Bitmapset *a, int x) @@ -786,11 +846,13 @@ bms_add_member(Bitmapset *a, int x) int wordnum, bitnum; + Assert(bms_is_valid_set(a)); + if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); if (a == NULL) return bms_make_singleton(x); - Assert(IsA(a, Bitmapset)); + wordnum = WORDNUM(x); bitnum = BITNUM(x); @@ -799,15 +861,8 @@ bms_add_member(Bitmapset *a, int x) { int oldnwords = a->nwords; int i; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; - a = (Bitmapset *) palloc(BITMAPSET_SIZE(wordnum + 1)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); -#else a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1)); -#endif a->nwords = wordnum + 1; /* zero out the enlarged portion */ i = oldnwords; @@ -816,18 +871,18 @@ bms_add_member(Bitmapset *a, int x) a->words[i] = 0; } while (++i < a->nwords); } + + a->words[wordnum] |= ((bitmapword) 1 << bitnum); + #ifdef REALLOCATE_BITMAPSETS - else - { - Bitmapset *tmp = a; - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); - } + /* + * There's no guarantee that the repalloc returned a new pointer, so copy + * and free unconditionally here. + */ + a = bms_copy_and_free(a); #endif - a->words[wordnum] |= ((bitmapword) 1 << bitnum); return a; } @@ -836,31 +891,26 @@ bms_add_member(Bitmapset *a, int x) * * No error if x is not currently a member of set * - * Input set is modified in-place! + * 'a' is recycled when possible. */ Bitmapset * bms_del_member(Bitmapset *a, int x) { int wordnum, bitnum; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; -#endif + + Assert(bms_is_valid_set(a)); if (x < 0) elog(ERROR, "negative bitmapset member not allowed"); if (a == NULL) return NULL; - Assert(IsA(a, Bitmapset)); - wordnum = WORDNUM(x); bitnum = BITNUM(x); #ifdef REALLOCATE_BITMAPSETS - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); + a = bms_copy_and_free(a); #endif /* member can't exist. Return 'a' unmodified */ @@ -890,7 +940,7 @@ bms_del_member(Bitmapset *a, int x) } /* - * bms_add_members - like bms_union, but left input is recycled + * bms_add_members - like bms_union, but left input is recycled when possible */ Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b) @@ -900,14 +950,20 @@ bms_add_members(Bitmapset *a, const Bitmapset *b) int otherlen; int i; - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) return bms_copy(b); if (b == NULL) + { +#ifdef REALLOCATE_BITMAPSETS + a = bms_copy_and_free(a); +#endif + return a; + } /* Identify shorter and longer input; copy the longer one if needed */ if (a->nwords < b->nwords) { @@ -916,13 +972,6 @@ bms_add_members(Bitmapset *a, const Bitmapset *b) } else { -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; - - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); -#endif result = a; other = b; } @@ -935,6 +984,11 @@ bms_add_members(Bitmapset *a, const Bitmapset *b) } while (++i < otherlen); if (result != a) pfree(a); +#ifdef REALLOCATE_BITMAPSETS + else + result = bms_copy_and_free(result); +#endif + return result; } @@ -955,11 +1009,17 @@ bms_add_range(Bitmapset *a, int lower, int upper) ushiftbits, wordnum; - Assert(a == NULL || IsA(a, Bitmapset)); + Assert(bms_is_valid_set(a)); /* do nothing if nothing is called for, without further checking */ if (upper < lower) + { +#ifdef REALLOCATE_BITMAPSETS + a = bms_copy_and_free(a); +#endif + return a; + } if (lower < 0) elog(ERROR, "negative bitmapset member not allowed"); @@ -975,16 +1035,9 @@ bms_add_range(Bitmapset *a, int lower, int upper) { int oldnwords = a->nwords; int i; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; - a = (Bitmapset *) palloc(BITMAPSET_SIZE(uwordnum + 1)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); -#else /* ensure we have enough words to store the upper bit */ a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1)); -#endif a->nwords = uwordnum + 1; /* zero out the enlarged portion */ i = oldnwords; @@ -1021,11 +1074,21 @@ bms_add_range(Bitmapset *a, int lower, int upper) a->words[uwordnum] |= (~(bitmapword) 0) >> ushiftbits; } +#ifdef REALLOCATE_BITMAPSETS + + /* + * There's no guarantee that the repalloc returned a new pointer, so copy + * and free unconditionally here. + */ + a = bms_copy_and_free(a); +#endif + return a; } /* - * bms_int_members - like bms_intersect, but left input is recycled + * bms_int_members - like bms_intersect, but left input is recycled when + * possible */ Bitmapset * bms_int_members(Bitmapset *a, const Bitmapset *b) @@ -1033,15 +1096,9 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) int lastnonzero; int shortlen; int i; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; -#endif - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); - - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) @@ -1052,12 +1109,6 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) return NULL; } -#ifdef REALLOCATE_BITMAPSETS - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); -#endif - /* Intersect b into a; we need never copy */ shortlen = Min(a->nwords, b->nwords); lastnonzero = -1; @@ -1079,35 +1130,38 @@ bms_int_members(Bitmapset *a, const Bitmapset *b) /* get rid of trailing zero words */ a->nwords = lastnonzero + 1; + +#ifdef REALLOCATE_BITMAPSETS + a = bms_copy_and_free(a); +#endif + return a; } /* - * bms_del_members - like bms_difference, but left input is recycled + * bms_del_members - delete members in 'a' that are set in 'b'. 'a' is + * recycled when possible. */ Bitmapset * bms_del_members(Bitmapset *a, const Bitmapset *b) { int i; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; -#endif - Assert(a == NULL || (IsA(a, Bitmapset) && a->words[a->nwords - 1] != 0)); - Assert(b == NULL || (IsA(b, Bitmapset) && b->words[b->nwords - 1] != 0)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) return NULL; if (b == NULL) - return a; - + { #ifdef REALLOCATE_BITMAPSETS - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); + a = bms_copy_and_free(a); #endif + return a; + } + /* Remove b's bits from a; we need never copy */ if (a->nwords > b->nwords) { @@ -1147,11 +1201,15 @@ bms_del_members(Bitmapset *a, const Bitmapset *b) a->nwords = lastnonzero + 1; } +#ifdef REALLOCATE_BITMAPSETS + a = bms_copy_and_free(a); +#endif + return a; } /* - * bms_join - like bms_union, but *both* inputs are recycled + * bms_join - like bms_union, but *either* input *may* be recycled */ Bitmapset * bms_join(Bitmapset *a, Bitmapset *b) @@ -1160,28 +1218,28 @@ bms_join(Bitmapset *a, Bitmapset *b) Bitmapset *other; int otherlen; int i; -#ifdef REALLOCATE_BITMAPSETS - Bitmapset *tmp = a; -#endif - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); - - Assert(a == NULL || IsA(a, Bitmapset)); - Assert(b == NULL || IsA(b, Bitmapset)); + Assert(bms_is_valid_set(a)); + Assert(bms_is_valid_set(b)); /* Handle cases where either input is NULL */ if (a == NULL) + { +#ifdef REALLOCATE_BITMAPSETS + b = bms_copy_and_free(b); +#endif + return b; + } if (b == NULL) - return a; - + { #ifdef REALLOCATE_BITMAPSETS - a = (Bitmapset *) palloc(BITMAPSET_SIZE(tmp->nwords)); - memcpy(a, tmp, BITMAPSET_SIZE(tmp->nwords)); - pfree(tmp); + a = bms_copy_and_free(a); #endif + return a; + } + /* Identify shorter and longer input; use longer one as result */ if (a->nwords < b->nwords) { @@ -1202,6 +1260,11 @@ bms_join(Bitmapset *a, Bitmapset *b) } while (++i < otherlen); if (other != result) /* pure paranoia */ pfree(other); + +#ifdef REALLOCATE_BITMAPSETS + result = bms_copy_and_free(result); +#endif + return result; } @@ -1231,7 +1294,7 @@ bms_next_member(const Bitmapset *a, int prevbit) int wordnum; bitmapword mask; - Assert(a == NULL || IsA(a, Bitmapset)); + Assert(bms_is_valid_set(a)); if (a == NULL) return -2; @@ -1292,7 +1355,7 @@ bms_prev_member(const Bitmapset *a, int prevbit) int ushiftbits; bitmapword mask; - Assert(a == NULL || IsA(a, Bitmapset)); + Assert(bms_is_valid_set(a)); /* * If set is NULL or if there are no more bits to the right then we've @@ -1337,6 +1400,8 @@ bms_prev_member(const Bitmapset *a, int prevbit) uint32 bms_hash_value(const Bitmapset *a) { + Assert(bms_is_valid_set(a)); + if (a == NULL) return 0; /* All empty sets hash to 0 */ return DatumGetUInt32(hash_any((const unsigned char *) a->words, -- 2.40.1