diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml index a1838e62661676203c42ca7eca61eab33e6534d9..26d73ce65240e71f1367886bd67fe546e7240675 100644 *** a/doc/src/sgml/gin.sgml --- b/doc/src/sgml/gin.sgml *************** *** 12,28 **** Introduction ! GIN stands for Generalized Inverted Index. It is ! an index structure storing a set of (key, posting list) pairs, where ! a posting list is a set of rows in which the key occurs. Each ! indexed value can contain many keys, so the same row ID can appear in ! multiple posting lists. ! It is generalized in the sense that a GIN index ! does not need to be aware of the operation that it accelerates. ! Instead, it uses custom strategies defined for particular data types. --- 12,49 ---- Introduction ! GIN stands for Generalized Inverted Index. ! GIN is designed for handling cases where the items ! to be indexed are composite values, and the queries to be handled by ! the index need to search for element values that appear within ! the composite items. For example, the items could be documents, ! and the queries could be searches for documents containing specific words. ! We use the word item to refer to a composite value that ! is to be indexed, and the word key to refer to an element ! value. GIN always stores and searches for keys, ! not item values per se. ! ! ! ! A GIN index stores a set of (key, posting list) pairs, ! where a posting list is a set of row IDs in which the key ! occurs. The same row ID can appear in multiple posting lists, since ! an item can contain more than one key. Each key value is stored only ! once, so a GIN index is very compact for cases ! where the same key appears many times. ! ! ! ! GIN is generalized in the sense that the ! GIN access method code does not need to know the ! specific operations that it accelerates. ! Instead, it uses custom strategies defined for particular data types. ! The strategy defines how keys are extracted from indexed items and ! query conditions, and how to determine whether a row that contains ! some of the key values in a query actually satisfies the query. *************** *** 54,60 **** All it takes to get a GIN access method working is to implement four (or five) user-defined methods, which define the behavior of ! keys in the tree and the relationships between keys, indexed values, and indexable queries. In short, GIN combines extensibility with generality, code reuse, and a clean interface. --- 75,81 ---- All it takes to get a GIN access method working is to implement four (or five) user-defined methods, which define the behavior of ! keys in the tree and the relationships between keys, indexed items, and indexable queries. In short, GIN combines extensibility with generality, code reuse, and a clean interface. *************** *** 68,96 **** int compare(Datum a, Datum b) ! Compares keys (not indexed values!) and returns an integer less than zero, zero, or greater than zero, indicating whether the first key is ! less than, equal to, or greater than the second. ! Datum *extractValue(Datum inputValue, int32 *nkeys) ! Returns an array of keys given a value to be indexed. The number of returned keys must be stored into *nkeys. Datum *extractQuery(Datum query, int32 *nkeys, ! StrategyNumber n, bool **pmatch, Pointer **extra_data) ! Returns an array of keys given a value to be queried; that is, query is the value on the right-hand side of an indexable operator whose left-hand side is the indexed column. n is the strategy number of the operator within the --- 89,126 ---- int compare(Datum a, Datum b) ! Compares two keys (not indexed items!) and returns an integer less than zero, zero, or greater than zero, indicating whether the first key is ! less than, equal to, or greater than the second. Null keys are never ! passed to this function. ! Datum *extractValue(Datum itemValue, int32 *nkeys, ! bool **nullFlags) ! Returns a palloc'd array of keys given an item to be indexed. The number of returned keys must be stored into *nkeys. + If any of the keys can be null, also palloc an array of + *nkeys booleans, store its address at + *nullFlags, and set these null flags as needed. + *nullFlags can be left NULL (its initial value) + if all keys are non-null. + The return value can be NULL if the item contains no keys. Datum *extractQuery(Datum query, int32 *nkeys, ! StrategyNumber n, bool **pmatch, Pointer **extra_data, ! bool **nullFlags) ! Returns a palloc'd array of keys given a value to be queried; that is, query is the value on the right-hand side of an indexable operator whose left-hand side is the indexed column. n is the strategy number of the operator within the *************** *** 99,120 **** to consult n to determine the data type of query and the key values that need to be extracted. The number of returned keys must be stored into *nkeys. If the query contains no keys then extractQuery should store 0 or -1 into *nkeys, depending on the ! semantics of the operator. 0 means that every ! value matches the query and a full-index scan should be ! performed (but see ). ! -1 means that nothing can match the query, and ! so the index scan can be skipped entirely. pmatch is an output argument for use when partial match is supported. To use it, extractQuery must allocate ! an array of *nkeys Booleans and store its address at *pmatch. Each element of the array should be set to TRUE if the corresponding key requires partial match, FALSE if not. If *pmatch is set to NULL then GIN assumes partial match is not required. The variable is initialized to NULL before call, so this argument can simply be ignored by operator classes that do not support partial match. extra_data is an output argument that allows extractQuery to pass additional data to the consistent and comparePartial methods. --- 129,165 ---- to consult n to determine the data type of query and the key values that need to be extracted. The number of returned keys must be stored into *nkeys. + If any of the keys can be null, also palloc an array of + *nkeys booleans, store its address at + *nullFlags, and set these null flags as needed. + *nullFlags can be left NULL (its initial value) + if all keys are non-null. + + + If the query contains no keys then extractQuery should store 0 or -1 into *nkeys, depending on the ! semantics of the operator. 0 means that every item potentially ! matches the query, so a full-index scan must be ! performed. -1 means that nothing can match the query, ! and so the index scan can be skipped entirely. (-1 essentially ! promises that the consistent function will return ! false for zero keys.) In either case, the return value can be NULL. ! ! ! pmatch is an output argument for use when partial match is supported. To use it, extractQuery must allocate ! an array of *nkeys booleans and store its address at *pmatch. Each element of the array should be set to TRUE if the corresponding key requires partial match, FALSE if not. If *pmatch is set to NULL then GIN assumes partial match is not required. The variable is initialized to NULL before call, so this argument can simply be ignored by operator classes that do not support partial match. + + + extra_data is an output argument that allows extractQuery to pass additional data to the consistent and comparePartial methods. *************** *** 133,157 **** bool consistent(bool check[], StrategyNumber n, Datum query, ! int32 nkeys, Pointer extra_data[], bool *recheck) ! Returns TRUE if the indexed value satisfies the query operator with ! strategy number n (or might satisfy, if the recheck ! indication is returned). The check array has length nkeys, which is the same as the number of keys previously returned by extractQuery for this query datum. Each element of the ! check array is TRUE if the indexed value contains the corresponding query key, ie, if (check[i] == TRUE) the i-th key of the ! extractQuery result array is present in the indexed value. ! The original query datum (not the extracted key array!) is ! passed in case the consistent method needs to consult it. extra_data is the extra-data array returned by extractQuery, or NULL if none. On success, *recheck should be set to TRUE if the heap tuple needs to be rechecked against the query operator, or FALSE if ! the index test is exact. --- 178,228 ---- bool consistent(bool check[], StrategyNumber n, Datum query, ! int32 nkeys, Pointer extra_data[], bool *recheck, ! Datum queryKeys[], bool nullFlags[]) ! Returns TRUE if an indexed item satisfies the query operator with ! strategy number n (or might satisfy it, if the recheck ! indication is returned). This function does not have direct access ! to the indexed item's value, since GIN does not ! store items explicitly. Rather, what is available is knowledge ! about which key values extracted from the query appear in a given ! heap row. The check array has length nkeys, which is the same as the number of keys previously returned by extractQuery for this query datum. Each element of the ! check array is TRUE if the indexed item contains the corresponding query key, ie, if (check[i] == TRUE) the i-th key of the ! extractQuery result array is present in the indexed item. ! The original query datum is ! passed in case the consistent method needs to consult it, ! and so are the queryKeys[] and nullFlags[] ! arrays previously returned by extractQuery. extra_data is the extra-data array returned by extractQuery, or NULL if none. + + + + When extractQuery returns a null key in + queryKeys[], the corresponding check[] element + is TRUE if the indexed item contains a null key; that is, the + semantics of check[] are like IS NOT DISTINCT + FROM. The consistent function can examine the + corresponding nullFlags[] element if it needs to tell + the difference between a regular value match and a null match. + + + On success, *recheck should be set to TRUE if the heap tuple needs to be rechecked against the query operator, or FALSE if ! the index test is exact. That is, a FALSE return value guarantees ! that the heap row does not match the query; a TRUE return value with ! *recheck set to FALSE guarantees that the heap row does ! match the query; and a TRUE return value with ! *recheck set to TRUE means that the heap row might match ! the query, so it needs to be fetched and rechecked by evaluating the ! query operator directly against the indexed item. *************** *** 166,172 **** Pointer extra_data) ! Compare a partial-match query to an index key. Returns an integer whose sign indicates the result: less than zero means the index key does not match the query, but the index scan should continue; zero means that the index key does match the query; greater than zero --- 237,243 ---- Pointer extra_data) ! Compare a partial-match query key to an index key. Returns an integer whose sign indicates the result: less than zero means the index key does not match the query, but the index scan should continue; zero means that the index key does match the query; greater than zero *************** *** 176,181 **** --- 247,253 ---- semantics are needed to determine when to end the scan. Also, extra_data is the corresponding element of the extra-data array made by extractQuery, or NULL if none. + Null keys are never passed to this function. *************** *** 190,195 **** --- 262,279 ---- for details. + + The actual data types of the various Datum values mentioned + above vary depending on the operator class. The item values passed to + extractValue are always of the operator class's input type, and + all key values must be of the class's STORAGE type. The type of + the query argument passed to extractQuery and + consistent is whatever is specified as the right-hand input + type of the class member operator identified by the strategy number. + This need not be the same as the item type, so long as key values of the + correct type can be extracted from it. + + *************** *** 197,206 **** Internally, a GIN index contains a B-tree index ! constructed over keys, where each key is an element of the indexed value ! (a member of an array, for example) and where each tuple in a leaf page is ! either a pointer to a B-tree over heap pointers (PT, posting tree), or a ! list of heap pointers (PL, posting list) if the list is small enough. --- 281,306 ---- Internally, a GIN index contains a B-tree index ! constructed over keys, where each key is an element of one or more indexed ! items (a member of an array, for example) and where each tuple in a leaf ! page contains either a pointer to a B-tree of heap pointers (a ! posting tree), or a simple list of heap pointers (a posting ! list) when the list is small enough to fit into a single index tuple along ! with the key value. ! ! ! ! As of PostgreSQL 9.1, NULL key values can be ! included in the index. Also, placeholder NULLs are included in the index ! for indexed items that are NULL or contain no keys according to ! extractValue. This allows searches that should find empty ! items to do so. ! ! ! ! Multi-column GIN indexes are implemented by building ! a single B-tree over composite values (column number, key value). The ! key values for different columns can be of different types. *************** *** 210,216 **** Updating a GIN index tends to be slow because of the intrinsic nature of inverted indexes: inserting or updating one heap row can cause many inserts into the index (one for each key extracted ! from the indexed value). As of PostgreSQL 8.4, GIN is capable of postponing much of this work by inserting new tuples into a temporary, unsorted list of pending entries. When the table is vacuumed, or if the pending list becomes too large --- 310,316 ---- Updating a GIN index tends to be slow because of the intrinsic nature of inverted indexes: inserting or updating one heap row can cause many inserts into the index (one for each key extracted ! from the indexed item). As of PostgreSQL 8.4, GIN is capable of postponing much of this work by inserting new tuples into a temporary, unsorted list of pending entries. When the table is vacuumed, or if the pending list becomes too large *************** *** 218,224 **** main GIN data structure using the same bulk insert techniques used during initial index creation. This greatly improves GIN index update speed, even counting the additional ! vacuum overhead. Moreover the overhead can be done by a background process instead of in foreground query processing. --- 318,324 ---- main GIN data structure using the same bulk insert techniques used during initial index creation. This greatly improves GIN index update speed, even counting the additional ! vacuum overhead. Moreover the overhead work can be done by a background process instead of in foreground query processing. *************** *** 252,260 **** The extractQuery method, instead of returning a key value to be matched exactly, returns a key value that is the lower bound of the range to be searched, and sets the pmatch flag true. ! The key range is then searched using the comparePartial ! method. comparePartial must return zero for an actual ! match, less than zero for a non-match that is still within the range to be searched, or greater than zero if the index key is past the range that could match. --- 352,360 ---- The extractQuery method, instead of returning a key value to be matched exactly, returns a key value that is the lower bound of the range to be searched, and sets the pmatch flag true. ! The key range is then scanned using the comparePartial ! method. comparePartial must return zero for a matching ! index key, less than zero for a non-match that is still within the range to be searched, or greater than zero if the index key is past the range that could match. *************** *** 271,277 **** Insertion into a GIN index can be slow ! due to the likelihood of many keys being inserted for each value. So, for bulk insertions into a table it is advisable to drop the GIN index and recreate it after finishing bulk insertion. --- 371,377 ---- Insertion into a GIN index can be slow ! due to the likelihood of many keys being inserted for each item. So, for bulk insertions into a table it is advisable to drop the GIN index and recreate it after finishing bulk insertion. *************** *** 338,346 **** Soft means that the actual number of returned results ! could differ slightly from the specified limit, depending on the query and the quality of the system's random number generator. --- 438,450 ---- Soft means that the actual number of returned results ! could differ somewhat from the specified limit, depending on the query and the quality of the system's random number generator. + + From experience, values in the thousands (e.g., 5000 — 20000) + work well. + *************** *** 351,394 **** Limitations ! GIN doesn't support full index scans. The reason for ! this is that extractValue is allowed to return zero keys, ! as for example might happen with an empty string or empty array. In such ! a case the indexed value will be unrepresented in the index. It is ! therefore impossible for GIN to guarantee that a ! scan of the index can find every row in the table. ! ! ! ! Because of this limitation, when extractQuery returns ! nkeys = 0 to indicate that all values match the query, ! GIN will emit an error. (If there are multiple ANDed ! indexable operators in the query, this happens only if they all return zero ! for nkeys.) ! ! ! ! It is possible for an operator class to circumvent the restriction against ! full index scan. To do that, extractValue must return at least ! one (possibly dummy) key for every indexed value, and ! extractQuery must convert an unrestricted search into ! a partial-match query that will scan the whole index. This is inefficient ! but might be necessary to avoid corner-case failures with operators such ! as LIKE or subset inclusion. ! ! ! ! GIN assumes that indexable operators are strict. ! This means that extractValue will not be called at all on ! a NULL value (so the value will go unindexed), and ! extractQuery will not be called on a NULL comparison ! value either (instead, the query is presumed to be unmatchable). ! A possibly more serious limitation is that GIN cannot ! handle NULL keys — for example, an array containing a NULL cannot ! be handled except by ignoring the NULL. --- 455,473 ---- Limitations ! GIN assumes that indexable operators are strict. This ! means that extractValue will not be called at all on a NULL ! item value (instead, a placeholder index entry is created automatically), ! and extractQuery will not be called on a NULL query ! value either (instead, the query is presumed to be unsatisfiable). Note ! however that NULL key values contained within a non-null composite item ! or query value are supported. ! If extractQuery returns zero keys from a query value, ! and does not report that the query is unsatisfiable, then a full-index scan ! will result. This is fairly inefficient. diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README index 0f634f83d1791cbf4b0007dcd521da9487e93199..b79a5e6e5136861b1f259c1bc7434e7f5b6569dc 100644 *** a/src/backend/access/gin/README --- b/src/backend/access/gin/README *************** is similar to GiST and differs from btre *** 15,37 **** comparison-based operations. An inverted index is an index structure storing a set of (key, posting list) ! pairs, where 'posting list' is a set of documents in which the key occurs. (A text document would usually contain many keys.) The primary goal of Gin indices is support for highly scalable, full-text search in PostgreSQL. ! Gin consists of a B-tree index constructed over entries (ET, entries tree), ! where each entry is an element of the indexed value (element of array, lexeme ! for tsvector) and where each tuple in a leaf page is either a pointer to a ! B-tree over item pointers (PT, posting tree), or a list of item pointers ! (PL, posting list) if the tuple is small enough. ! Note: There is no delete operation for ET. The reason for this is that in ! our experience, the set of distinct words in a large corpus changes very ! rarely. This greatly simplifies the code and concurrency algorithms. ! Gin comes with built-in support for one-dimensional arrays (eg. integer[], ! text[]), but no support for NULL elements. The following operations are ! available: * contains: value_array @> query_array * overlaps: value_array && query_array --- 15,37 ---- comparison-based operations. An inverted index is an index structure storing a set of (key, posting list) ! pairs, where 'posting list' is a set of heap rows in which the key occurs. (A text document would usually contain many keys.) The primary goal of Gin indices is support for highly scalable, full-text search in PostgreSQL. ! A Gin index consists of a B-tree index constructed over key values, ! where each key is an element of some indexed items (element of array, lexeme ! for tsvector) and where each tuple in a leaf page contains either a pointer to ! a B-tree over item pointers (posting tree), or a simple list of item pointers ! (posting list) if the list is small enough. ! Note: There is no delete operation in the key (entry) tree. The reason for ! this is that in our experience, the set of distinct words in a large corpus ! changes very slowly. This greatly simplifies the code and concurrency ! algorithms. ! Core PostgreSQL includes built-in Gin support for one-dimensional arrays ! (eg. integer[], text[]). The following operations are available: * contains: value_array @> query_array * overlaps: value_array && query_array *************** limit). *** 71,77 **** If a non-zero search limit is set, then the returned set is a subset of the whole result set, chosen at random. ! "Soft" means that the actual number of returned results could slightly differ from the specified limit, depending on the query and the quality of the system's random number generator. --- 71,77 ---- If a non-zero search limit is set, then the returned set is a subset of the whole result set, chosen at random. ! "Soft" means that the actual number of returned results could differ from the specified limit, depending on the query and the quality of the system's random number generator. *************** From experience, a value of 'gin_fuzzy_s *** 80,119 **** have no effect for queries returning a result set with less tuples than this number. ! Limitations ! ----------- ! * No support for multicolumn indices ! * Gin doesn't uses scan->kill_prior_tuple & scan->ignore_killed_tuples ! * Gin searches entries only by equality matching. This may be improved in ! future. ! * Gin doesn't support full scans of indices. ! * Gin doesn't index NULL values. ! Open Items ! ---------- ! We appreciate any comments, help and suggestions. ! * Teach optimizer/executor that GIN is intrinsically clustered. i.e., it ! always returns ItemPointer in ascending order. ! * Tweak gincostestimate. TODO ---- Nearest future: ! * Opclasses for all types (no programming, just many catalog changes). Distant future: * Replace B-tree of entries to something like GiST - * Add multicolumn support - * Optimize insert operations (background index insertion) Authors ------- ! All work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov (oleg@sai.msu.su). --- 80,236 ---- have no effect for queries returning a result set with less tuples than this number. ! Index structure ! --------------- ! The "items" that a GIN index indexes are composite values that contain ! zero or more "keys". For example, an item might be an integer array, and ! then the keys would be the individual integer values. The index actually ! stores and searches for the key values, not the items per se. In the ! pg_opclass entry for a GIN opclass, the opcintype is the data type of the ! items, and the opckeytype is the data type of the keys. GIN is optimized ! for cases where items contain many keys and the same key values appear ! in many different items. ! A GIN index contains a metapage, a btree of key entries, and possibly ! "posting tree" pages, which hold the overflow when a key entry acquires ! too many heap tuple pointers to fit in a btree page. Additionally, if the ! fast-update feature is enabled, there can be "list pages" holding "pending" ! key entries that haven't yet been merged into the main btree. The list ! pages have to be scanned linearly when doing a search, so the pending ! entries should be merged into the main btree before there get to be too ! many of them. The advantage of the pending list is that bulk insertion of ! a few thousand entries can be much faster than retail insertion. (The win ! comes mainly from not having to do multiple searches/insertions when the ! same key appears in multiple new heap tuples.) ! Key entries are nominally of the same IndexEntry format as used in other ! index types, but since a leaf key entry typically refers to multiple heap ! tuples, there are significant differences. (See GinFormTuple, which works ! by building a "normal" index tuple and then modifying it.) The points to ! know are: ! * In a single-column index, a key tuple just contains the key datum, but ! in a multi-column index, a key tuple contains the pair (column number, ! key datum) where the column number is stored as an int2. This is needed ! to support different key data types in different columns. This much of ! the tuple is built by index_form_tuple according to the usual rules. ! The column number (if present) can never be null, but the key datum can ! be, in which case a null bitmap is present as usual. (As usual for index ! tuples, the size of the null bitmap is fixed at INDEX_MAX_KEYS.) ! ! * If the key datum is null (ie, IndexTupleHasNulls() is true), then ! just after the nominal index data (ie, at offset IndexInfoFindDataOffset ! or IndexInfoFindDataOffset + sizeof(int2)) there is a byte indicating ! the "category" of the null entry. These are the possible categories: ! 1 = ordinary null key value extracted from an indexable item ! 2 = placeholder for zero-key indexable item ! 3 = placeholder for null indexable item ! Placeholder null entries are inserted into the index because otherwise ! there would be no index entry at all for an empty or null indexable item, ! which would mean that full index scans couldn't be done and various corner ! cases would give wrong answers. The different categories of null entries ! are treated as distinct keys by the btree, but heap itempointers for the ! same category of null entry are merged into one index entry just as happens ! with ordinary key entries. ! ! * In a key entry at the btree leaf level, at the next SHORTALIGN boundary, ! there is an array of zero or more ItemPointers, which store the heap tuple ! TIDs for which the indexable items contain this key. This is called the ! "posting list". The TIDs in a posting list must appear in sorted order. ! If the list would be too big for the index tuple to fit on an index page, ! the ItemPointers are pushed out to a separate posting page or pages, and ! none appear in the key entry itself. The separate pages are called a ! "posting tree"; they are organized as a btree of ItemPointer values. ! Note that in either case, the ItemPointers associated with a key can ! easily be read out in sorted order; this is relied on by the scan ! algorithms. ! ! * The index tuple header fields of a leaf key entry are abused as follows: ! ! 1) Posting list case: ! ! * ItemPointerGetBlockNumber(&itup->t_tid) contains the offset from index ! tuple start to the posting list. ! Access macros: GinGetPostingOffset(itup) / GinSetPostingOffset(itup,n) ! ! * ItemPointerGetOffsetNumber(&itup->t_tid) contains the number of elements ! in the posting list (number of heap itempointers). ! Access macros: GinGetNPosting(itup) / GinSetNPosting(itup,n) ! ! * If IndexTupleHasNulls(itup) is true, the null category byte can be ! accessed/set with GinGetNullCategory(itup,gs) / GinSetNullCategory(itup,gs,c) ! ! * The posting list can be accessed with GinGetPosting(itup) ! ! 2) Posting tree case: ! ! * ItemPointerGetBlockNumber(&itup->t_tid) contains the index block number ! of the root of the posting tree. ! Access macros: GinGetPostingTree(itup) / GinSetPostingTree(itup, blkno) ! ! * ItemPointerGetOffsetNumber(&itup->t_tid) contains the magic number ! GIN_TREE_POSTING, which distinguishes this from the posting-list case ! (it's large enough that that many heap itempointers couldn't possibly ! fit on an index page). This value is inserted automatically by the ! GinSetPostingTree macro. ! ! * If IndexTupleHasNulls(itup) is true, the null category byte can be ! accessed/set with GinGetNullCategory(itup) / GinSetNullCategory(itup,c) ! ! * The posting list is not present and must not be accessed. ! ! Use the macro GinIsPostingTree(itup) to determine which case applies. ! ! In both cases, itup->t_info & INDEX_SIZE_MASK contains actual total size of ! tuple, and the INDEX_VAR_MASK and INDEX_NULL_MASK bits have their normal ! meanings as set by index_form_tuple. ! ! Index tuples in non-leaf levels of the btree contain the optional column ! number, key datum, and null category byte as above. They do not contain ! a posting list. ItemPointerGetBlockNumber(&itup->t_tid) is the downlink ! to the next lower btree level, and ItemPointerGetOffsetNumber(&itup->t_tid) ! is InvalidOffsetNumber. Use the access macros GinGetDownlink/GinSetDownlink ! to get/set the downlink. ! ! Index entries that appear in "pending list" pages work a tad differently as ! well. The optional column number, key datum, and null category byte are as ! for other GIN index entries. However, there is always exactly one heap ! itempointer associated with a pending entry, and it is stored in the t_tid ! header field just as in non-GIN indexes. There is no posting list. ! Furthermore, the code that searches the pending list assumes that all ! entries for a given heap tuple appear consecutively in the pending list and ! are sorted by the column-number-plus-key-datum. The GIN_LIST_FULLROW page ! flag bit tells whether entries for a given heap tuple are spread across ! multiple pending-list pages. If GIN_LIST_FULLROW is set, the page contains ! all the entries for one or more heap tuples. If GIN_LIST_FULLROW is clear, ! the page contains entries for only one heap tuple, *and* they are not all ! the entries for that tuple. (Thus, a heap tuple whose entries do not all ! fit on one pending-list page must have those pages to itself, even if this ! results in wasting much of the space on the preceding page and the last ! page for the tuple.) ! ! Limitations ! ----------- ! ! * Gin doesn't use scan->kill_prior_tuple & scan->ignore_killed_tuples ! * Gin searches entries only by equality matching, or simple range ! matching using the "partial match" feature. ! * Zero-key queries require a full index scan which is quite inefficient. TODO ---- Nearest future: ! * Opclasses for more types (no programming, just many catalog changes) Distant future: * Replace B-tree of entries to something like GiST Authors ------- ! Original work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov (oleg@sai.msu.su). diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index ec038aac4b065724f7785806681c5cf422904b3b..a2a4c5867f2c7c9c8716ab42a6bb8496d2ad3793 100644 *** a/src/backend/access/gin/ginbtree.c --- b/src/backend/access/gin/ginbtree.c *************** ginFindLeafPage(GinBtree btree, GinBtree *** 104,110 **** * ok, page is correctly locked, we should check to move right .., * root never has a right link, so small optimization */ ! while (btree->fullScan == FALSE && stack->blkno != rootBlkno && btree->isMoveRight(btree, page)) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; --- 104,111 ---- * ok, page is correctly locked, we should check to move right .., * root never has a right link, so small optimization */ ! while (btree->fullScan == FALSE && stack->blkno != rootBlkno && ! btree->isMoveRight(btree, page)) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; *************** ginFindParents(GinBtree btree, GinBtreeS *** 226,232 **** LockBuffer(root->buffer, GIN_UNLOCK); Assert(blkno != InvalidBlockNumber); - for (;;) { buffer = ReadBuffer(btree->index, blkno); --- 227,232 ---- diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index a4ee3364e10a91bbd53b2ad74bae336287226e0c..20c6152d443377a1e3048d60a10d6b0bbed38a27 100644 *** a/src/backend/access/gin/ginbulk.c --- b/src/backend/access/gin/ginbulk.c *************** *** 19,25 **** #include "utils/memutils.h" ! #define DEF_NENTRY 2048 /* EntryAccumulator allocation quantum */ #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ --- 19,25 ---- #include "utils/memutils.h" ! #define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */ #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ *************** *** 27,74 **** static void ginCombineData(RBNode *existing, const RBNode *newdata, void *arg) { ! EntryAccumulator *eo = (EntryAccumulator *) existing; ! const EntryAccumulator *en = (const EntryAccumulator *) newdata; BuildAccumulator *accum = (BuildAccumulator *) arg; /* * Note this code assumes that newdata contains only one itempointer. */ ! if (eo->number >= eo->length) { accum->allocatedMemory -= GetMemoryChunkSpace(eo->list); ! eo->length *= 2; ! eo->list = (ItemPointerData *) repalloc(eo->list, ! sizeof(ItemPointerData) * eo->length); accum->allocatedMemory += GetMemoryChunkSpace(eo->list); } ! /* If item pointers are not ordered, they will need to be sorted. */ if (eo->shouldSort == FALSE) { int res; ! res = ginCompareItemPointers(eo->list + eo->number - 1, en->list); Assert(res != 0); if (res > 0) eo->shouldSort = TRUE; } ! eo->list[eo->number] = en->list[0]; ! eo->number++; } /* Comparator function for rbtree.c */ static int cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg) { ! const EntryAccumulator *ea = (const EntryAccumulator *) a; ! const EntryAccumulator *eb = (const EntryAccumulator *) b; BuildAccumulator *accum = (BuildAccumulator *) arg; ! return ginCompareAttEntries(accum->ginstate, ea->attnum, ea->value, ! eb->attnum, eb->value); } /* Allocator function for rbtree.c */ --- 27,75 ---- static void ginCombineData(RBNode *existing, const RBNode *newdata, void *arg) { ! GinEntryAccumulator *eo = (GinEntryAccumulator *) existing; ! const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata; BuildAccumulator *accum = (BuildAccumulator *) arg; /* * Note this code assumes that newdata contains only one itempointer. */ ! if (eo->count >= eo->maxcount) { accum->allocatedMemory -= GetMemoryChunkSpace(eo->list); ! eo->maxcount *= 2; ! eo->list = (ItemPointerData *) ! repalloc(eo->list, sizeof(ItemPointerData) * eo->maxcount); accum->allocatedMemory += GetMemoryChunkSpace(eo->list); } ! /* If item pointers are not ordered, they will need to be sorted later */ if (eo->shouldSort == FALSE) { int res; ! res = ginCompareItemPointers(eo->list + eo->count - 1, en->list); Assert(res != 0); if (res > 0) eo->shouldSort = TRUE; } ! eo->list[eo->count] = en->list[0]; ! eo->count++; } /* Comparator function for rbtree.c */ static int cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg) { ! const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a; ! const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b; BuildAccumulator *accum = (BuildAccumulator *) arg; ! return ginCompareAttEntries(accum->ginstate, ! ea->attnum, ea->key, ea->category, ! eb->attnum, eb->key, eb->category); } /* Allocator function for rbtree.c */ *************** static RBNode * *** 76,97 **** ginAllocEntryAccumulator(void *arg) { BuildAccumulator *accum = (BuildAccumulator *) arg; ! EntryAccumulator *ea; /* * Allocate memory by rather big chunks to decrease overhead. We have * no need to reclaim RBNodes individually, so this costs nothing. */ ! if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY) { ! accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY); accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); ! accum->length = 0; } /* Allocate new RBNode from current chunk */ ! ea = accum->entryallocator + accum->length; ! accum->length++; return (RBNode *) ea; } --- 77,98 ---- ginAllocEntryAccumulator(void *arg) { BuildAccumulator *accum = (BuildAccumulator *) arg; ! GinEntryAccumulator *ea; /* * Allocate memory by rather big chunks to decrease overhead. We have * no need to reclaim RBNodes individually, so this costs nothing. */ ! if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY) { ! accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY); accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); ! accum->eas_used = 0; } /* Allocate new RBNode from current chunk */ ! ea = accum->entryallocator + accum->eas_used; ! accum->eas_used++; return (RBNode *) ea; } *************** ginAllocEntryAccumulator(void *arg) *** 99,108 **** void ginInitBA(BuildAccumulator *accum) { accum->allocatedMemory = 0; - accum->length = 0; accum->entryallocator = NULL; ! accum->tree = rb_create(sizeof(EntryAccumulator), cmpEntryAccumulator, ginCombineData, ginAllocEntryAccumulator, --- 100,110 ---- void ginInitBA(BuildAccumulator *accum) { + /* accum->ginstate is intentionally not set here */ accum->allocatedMemory = 0; accum->entryallocator = NULL; ! accum->eas_used = 0; ! accum->tree = rb_create(sizeof(GinEntryAccumulator), cmpEntryAccumulator, ginCombineData, ginAllocEntryAccumulator, *************** ginInitBA(BuildAccumulator *accum) *** 111,118 **** } /* ! * This is basically the same as datumCopy(), but modified to count ! * palloc'd space in accum. */ static Datum getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) --- 113,120 ---- } /* ! * This is basically the same as datumCopy(), but extended to count ! * palloc'd space in accum->allocatedMemory. */ static Datum getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) *************** getDatumCopy(BuildAccumulator *accum, Of *** 134,165 **** * Find/store one entry from indexed value. */ static void ! ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry) { ! EntryAccumulator key; ! EntryAccumulator *ea; bool isNew; /* ! * For the moment, fill only the fields of key that will be looked at * by cmpEntryAccumulator or ginCombineData. */ ! key.attnum = attnum; ! key.value = entry; /* temporarily set up single-entry itempointer list */ ! key.list = heapptr; ! ea = (EntryAccumulator *) rb_insert(accum->tree, (RBNode *) &key, &isNew); if (isNew) { /* * Finish initializing new tree entry, including making permanent ! * copies of the datum and itempointer. */ ! ea->value = getDatumCopy(accum, attnum, entry); ! ea->length = DEF_NPTR; ! ea->number = 1; ea->shouldSort = FALSE; ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR); --- 136,172 ---- * Find/store one entry from indexed value. */ static void ! ginInsertBAEntry(BuildAccumulator *accum, ! ItemPointer heapptr, OffsetNumber attnum, ! Datum key, GinNullCategory category) { ! GinEntryAccumulator eatmp; ! GinEntryAccumulator *ea; bool isNew; /* ! * For the moment, fill only the fields of eatmp that will be looked at * by cmpEntryAccumulator or ginCombineData. */ ! eatmp.attnum = attnum; ! eatmp.key = key; ! eatmp.category = category; /* temporarily set up single-entry itempointer list */ ! eatmp.list = heapptr; ! ea = (GinEntryAccumulator *) rb_insert(accum->tree, (RBNode *) &eatmp, ! &isNew); if (isNew) { /* * Finish initializing new tree entry, including making permanent ! * copies of the datum (if it's not null) and itempointer. */ ! if (category == GIN_CAT_NORM_KEY) ! ea->key = getDatumCopy(accum, attnum, key); ! ea->maxcount = DEF_NPTR; ! ea->count = 1; ea->shouldSort = FALSE; ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR); *************** ginInsertEntry(BuildAccumulator *accum, *** 175,181 **** } /* ! * Insert one heap pointer. * * Since the entries are being inserted into a balanced binary tree, you * might think that the order of insertion wouldn't be critical, but it turns --- 182,188 ---- } /* ! * Insert the entries for one heap pointer. * * Since the entries are being inserted into a balanced binary tree, you * might think that the order of insertion wouldn't be critical, but it turns *************** ginInsertEntry(BuildAccumulator *accum, *** 187,208 **** * We do this as follows. First, we imagine that we have an array whose size * is the smallest power of two greater than or equal to the actual array * size. Second, we insert the middle entry of our virtual array into the ! * tree; then, we insert the middles of each half of out virtual array, then * middles of quarters, etc. */ void ! ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, ! Datum *entries, int32 nentry) { ! uint32 step = nentry; ! if (nentry <= 0) return; Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); /* ! * step will contain largest power of 2 and <= nentry */ step |= (step >> 1); step |= (step >> 2); --- 194,217 ---- * We do this as follows. First, we imagine that we have an array whose size * is the smallest power of two greater than or equal to the actual array * size. Second, we insert the middle entry of our virtual array into the ! * tree; then, we insert the middles of each half of our virtual array, then * middles of quarters, etc. */ void ! ginInsertBAEntries(BuildAccumulator *accum, ! ItemPointer heapptr, OffsetNumber attnum, ! Datum *entries, GinNullCategory *categories, ! int32 nentries) { ! uint32 step = nentries; ! if (nentries <= 0) return; Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); /* ! * step will contain largest power of 2 and <= nentries */ step |= (step >> 1); step |= (step >> 2); *************** ginInsertRecordBA(BuildAccumulator *accu *** 216,223 **** { int i; ! for (i = step - 1; i < nentry && i >= 0; i += step << 1 /* *2 */ ) ! ginInsertEntry(accum, heapptr, attnum, entries[i]); step >>= 1; /* /2 */ } --- 225,233 ---- { int i; ! for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ ) ! ginInsertBAEntry(accum, heapptr, attnum, ! entries[i], categories[i]); step >>= 1; /* /2 */ } *************** qsortCompareItemPointers(const void *a, *** 228,264 **** { int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b); Assert(res != 0); return res; } ! /* Prepare to read out the rbtree contents using ginGetEntry */ void ginBeginBAScan(BuildAccumulator *accum) { rb_begin_iterate(accum->tree, LeftRightWalk); } ItemPointerData * ! ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n) { ! EntryAccumulator *entry; ItemPointerData *list; ! entry = (EntryAccumulator *) rb_iterate(accum->tree); if (entry == NULL) ! return NULL; - *n = entry->number; *attnum = entry->attnum; ! *value = entry->value; list = entry->list; ! Assert(list != NULL); ! if (entry->shouldSort && entry->number > 1) ! qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers); return list; } --- 238,284 ---- { int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b); + /* Assert that there are no equal item pointers being sorted */ Assert(res != 0); return res; } ! /* Prepare to read out the rbtree contents using ginGetBAEntry */ void ginBeginBAScan(BuildAccumulator *accum) { rb_begin_iterate(accum->tree, LeftRightWalk); } + /* + * Get the next entry in sequence from the BuildAccumulator's rbtree. + * This consists of a single key datum and a list (array) of one or more + * heap TIDs in which that key is found. The list is guaranteed sorted. + */ ItemPointerData * ! ginGetBAEntry(BuildAccumulator *accum, ! OffsetNumber *attnum, Datum *key, GinNullCategory *category, ! uint32 *n) { ! GinEntryAccumulator *entry; ItemPointerData *list; ! entry = (GinEntryAccumulator *) rb_iterate(accum->tree); if (entry == NULL) ! return NULL; /* no more entries */ *attnum = entry->attnum; ! *key = entry->key; ! *category = entry->category; list = entry->list; + *n = entry->count; ! Assert(list != NULL && entry->count > 0); ! if (entry->shouldSort && entry->count > 1) ! qsort(list, entry->count, sizeof(ItemPointerData), ! qsortCompareItemPointers); return list; } diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index f6e86da8db87a31ec9b037ac0729162433d40a10..841b03128af46641174336481de7da8500924ac5 100644 *** a/src/backend/access/gin/gindatapage.c --- b/src/backend/access/gin/gindatapage.c *************** *** 21,34 **** int ginCompareItemPointers(ItemPointer a, ItemPointer b) { ! if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b)) { ! if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b)) return 0; ! return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1; } ! return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1; } /* --- 21,40 ---- int ginCompareItemPointers(ItemPointer a, ItemPointer b) { ! BlockNumber ba = GinItemPointerGetBlockNumber(a); ! BlockNumber bb = GinItemPointerGetBlockNumber(b); ! ! if (ba == bb) { ! OffsetNumber oa = GinItemPointerGetOffsetNumber(a); ! OffsetNumber ob = GinItemPointerGetOffsetNumber(b); ! ! if (oa == ob) return 0; ! return (oa > ob) ? 1 : -1; } ! return (ba > bb) ? 1 : -1; } /* *************** dataLocateItem(GinBtree btree, GinBtreeS *** 122,133 **** pitem = (PostingItem *) GinDataPageGetItem(page, mid); if (mid == maxoff) ! /* * Right infinity, page already correctly chosen with a help of * dataIsMoveRight */ result = -1; else { pitem = (PostingItem *) GinDataPageGetItem(page, mid); --- 128,140 ---- pitem = (PostingItem *) GinDataPageGetItem(page, mid); if (mid == maxoff) ! { /* * Right infinity, page already correctly chosen with a help of * dataIsMoveRight */ result = -1; + } else { pitem = (PostingItem *) GinDataPageGetItem(page, mid); *************** dataFindChildPtr(GinBtree btree, Page pa *** 220,226 **** Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); ! /* if page isn't changed, we returns storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); --- 227,233 ---- Assert(!GinPageIsLeaf(page)); Assert(GinPageIsData(page)); ! /* if page isn't changed, we return storedOff */ if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { pitem = (PostingItem *) GinDataPageGetItem(page, storedOff); *************** GinDataPageAddItem(Page page, void *data *** 286,294 **** { ptr = GinDataPageGetItem(page, offset); if (maxoff + 1 - offset != 0) ! memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page)); } ! memcpy(ptr, data, GinSizeOfItem(page)); GinPageGetOpaque(page)->maxoff++; } --- 293,303 ---- { ptr = GinDataPageGetItem(page, offset); if (maxoff + 1 - offset != 0) ! memmove(ptr + GinSizeOfDataPageItem(page), ! ptr, ! (maxoff - offset + 1) * GinSizeOfDataPageItem(page)); } ! memcpy(ptr, data, GinSizeOfDataPageItem(page)); GinPageGetOpaque(page)->maxoff++; } *************** static void *** 372,381 **** dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); static XLogRecData rdata[3]; - int sizeofitem = GinSizeOfItem(page); static ginxlogInsert data; - int cnt = 0; *prdata = rdata; Assert(GinPageIsData(page)); --- 381,391 ---- dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); + int sizeofitem = GinSizeOfDataPageItem(page); + int cnt = 0; + /* these must be static so they can be returned to caller */ static XLogRecData rdata[3]; static ginxlogInsert data; *prdata = rdata; Assert(GinPageIsData(page)); *************** dataPlaceToPage(GinBtree btree, Buffer b *** 453,472 **** static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { - static ginxlogSplit data; - static XLogRecData rdata[4]; - static char vector[2 * BLCKSZ]; char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); ! int sizeofitem = GinSizeOfItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; uint32 nCopied = 1; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); --- 463,483 ---- static Page dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { char *ptr; OffsetNumber separator; ItemPointer bound; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); ! int sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); Size freeSpace; uint32 nCopied = 1; + /* these must be static so they can be returned to caller */ + static ginxlogSplit data; + static XLogRecData rdata[4]; + static char vector[2 * BLCKSZ]; GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); freeSpace = GinDataPageGetFreeSpace(rpage); *************** dataSplitPage(GinBtree btree, Buffer lbu *** 482,490 **** if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { nCopied = 0; ! while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { ! memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem, sizeof(ItemPointerData)); maxoff++; nCopied++; --- 493,503 ---- if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff) { nCopied = 0; ! while (btree->curitem < btree->nitem && ! maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData))) { ! memcpy(vector + maxoff * sizeof(ItemPointerData), ! btree->items + btree->curitem, sizeof(ItemPointerData)); maxoff++; nCopied++; *************** ginPrepareScanPostingTree(Relation index *** 631,639 **** * Inserts array of item pointers, may execute several tree scan (very rare) */ void ! ginInsertItemPointer(GinPostingTreeScan *gdi, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats) { BlockNumber rootBlkno = gdi->stack->blkno; --- 644,652 ---- * Inserts array of item pointers, may execute several tree scan (very rare) */ void ! ginInsertItemPointers(GinPostingTreeScan *gdi, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats) { BlockNumber rootBlkno = gdi->stack->blkno; diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index e56034202ad728cda6cbf784592f20274165ad97..dea2ab9e55098e90cf46cbe8c8990d9779dfa475 100644 *** a/src/backend/access/gin/ginentrypage.c --- b/src/backend/access/gin/ginentrypage.c *************** *** 24,130 **** * If the tuple would be too big to be stored, function throws a suitable * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. * ! * On leaf pages, Index tuple has non-traditional layout. Tuple may contain ! * posting list or root blocknumber of posting tree. ! * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno) ! * 1) Posting list ! * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual ! * - ItemPointerGetBlockNumber(&itup->t_tid) contains original ! * size of tuple (without posting list). ! * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) ! * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number ! * of elements in posting list (number of heap itempointers) ! * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n) ! * - After standard part of tuple there is a posting list, ie, array ! * of heap itempointers ! * Macros: GinGetPosting(itup) ! * 2) Posting tree ! * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual ! * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of ! * root of posting tree ! * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number ! * GIN_TREE_POSTING, which distinguishes this from posting-list case ! * ! * Attributes of an index tuple are different for single and multicolumn index. ! * For single-column case, index tuple stores only value to be indexed. ! * For multicolumn case, it stores two attributes: column number of value ! * and value. */ IndexTuple ! GinFormTuple(Relation index, GinState *ginstate, ! OffsetNumber attnum, Datum key, ! ItemPointerData *ipd, uint32 nipd, bool errorTooBig) { ! bool isnull[2] = {FALSE, FALSE}; IndexTuple itup; uint32 newsize; if (ginstate->oneCol) ! itup = index_form_tuple(ginstate->origTupdesc, &key, isnull); else { - Datum datums[2]; - datums[0] = UInt16GetDatum(attnum); datums[1] = key; ! itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); } ! GinSetOrigSizePosting(itup, IndexTupleSize(itup)); ! if (nipd > 0) { ! newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd); ! if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize)) ! { ! if (errorTooBig) ! ereport(ERROR, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", ! (unsigned long) newsize, ! (unsigned long) Min(INDEX_SIZE_MASK, ! GinMaxItemSize), ! RelationGetRelationName(index)))); ! return NULL; ! } itup = repalloc(itup, newsize); ! /* set new size */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; - - if (ipd) - memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd); - GinSetNPosting(itup, nipd); } - else - { - /* - * Gin tuple without any ItemPointers should be large enough to keep - * one ItemPointer, to prevent inconsistency between - * ginHeapTupleFastCollect and ginEntryInsert called by - * ginHeapTupleInsert. ginHeapTupleFastCollect forms tuple without - * extra pointer to heap, but ginEntryInsert (called for pending list - * cleanup during vacuum) will form the same tuple with one - * ItemPointer. - */ - newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)); - if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize)) - { - if (errorTooBig) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", - (unsigned long) newsize, - (unsigned long) Min(INDEX_SIZE_MASK, - GinMaxItemSize), - RelationGetRelationName(index)))); - return NULL; - } ! GinSetNPosting(itup, 0); } return itup; } --- 24,139 ---- * If the tuple would be too big to be stored, function throws a suitable * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. * ! * See src/backend/access/gin/README for a description of the index tuple ! * format that is being built here. We build on the assumption that we ! * are making a leaf-level key entry containing a posting list of nipd items. ! * If the caller is actually trying to make a posting-tree entry, non-leaf ! * entry, or pending-list entry, it should pass nipd = 0 and then overwrite ! * the t_tid fields as necessary. In any case, ipd can be NULL to skip ! * copying any itempointers into the posting list; the caller is responsible ! * for filling the posting list afterwards, if ipd = NULL and nipd > 0. */ IndexTuple ! GinFormTuple(GinState *ginstate, ! OffsetNumber attnum, Datum key, GinNullCategory category, ! ItemPointerData *ipd, uint32 nipd, ! bool errorTooBig) { ! Datum datums[2]; ! bool isnull[2]; IndexTuple itup; uint32 newsize; + /* Build the basic tuple: optional column number, plus key datum */ if (ginstate->oneCol) ! { ! datums[0] = key; ! isnull[0] = (category != GIN_CAT_NORM_KEY); ! } else { datums[0] = UInt16GetDatum(attnum); + isnull[0] = false; datums[1] = key; ! isnull[1] = (category != GIN_CAT_NORM_KEY); } ! itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); ! /* ! * Determine and store offset to the posting list, making sure there is ! * room for the category byte if needed. ! * ! * Note: because index_form_tuple MAXALIGNs the tuple size, there may well ! * be some wasted pad space. Is it worth recomputing the data length to ! * prevent that? That would also allow us to Assert that the real data ! * doesn't overlap the GinNullCategory byte, which this code currently ! * takes on faith. ! */ ! newsize = IndexTupleSize(itup); ! ! if (IndexTupleHasNulls(itup)) { ! uint32 minsize; ! ! Assert(category != GIN_CAT_NORM_KEY); ! minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory); ! newsize = Max(newsize, minsize); ! } ! ! newsize = SHORTALIGN(newsize); ! ! GinSetPostingOffset(itup, newsize); + GinSetNPosting(itup, nipd); + + /* + * Add space needed for posting list, if any. Then check that the tuple + * won't be too big to store. + */ + newsize += sizeof(ItemPointerData) * nipd; + newsize = MAXALIGN(newsize); + if (newsize > Min(INDEX_SIZE_MASK, GinMaxItemSize)) + { + if (errorTooBig) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("index row size %lu exceeds maximum %lu for index \"%s\"", + (unsigned long) newsize, + (unsigned long) Min(INDEX_SIZE_MASK, + GinMaxItemSize), + RelationGetRelationName(ginstate->index)))); + pfree(itup); + return NULL; + } + + /* + * Resize tuple if needed + */ + if (newsize != IndexTupleSize(itup)) + { itup = repalloc(itup, newsize); ! /* set new size in tuple header */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; } ! /* ! * Insert category byte, if needed ! */ ! if (category != GIN_CAT_NORM_KEY) ! { ! Assert(IndexTupleHasNulls(itup)); ! GinSetNullCategory(itup, ginstate, category); } + + /* + * Copy in the posting list, if provided + */ + if (ipd) + memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd); + return itup; } *************** GinShortenTuple(IndexTuple itup, uint32 *** 140,146 **** Assert(nipd <= GinGetNPosting(itup)); ! newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd); Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK)); --- 149,156 ---- Assert(nipd <= GinGetNPosting(itup)); ! newsize = GinGetPostingOffset(itup) + sizeof(ItemPointerData) * nipd; ! newsize = MAXALIGN(newsize); Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK)); *************** GinShortenTuple(IndexTuple itup, uint32 *** 151,158 **** } /* * Entry tree is a "static", ie tuple never deletes from it, ! * so we don't use right bound, we use rightest key instead. */ static IndexTuple getRightMostTuple(Page page) --- 161,205 ---- } /* + * Form a non-leaf entry tuple by copying the key data from the given tuple, + * which can be either a leaf or non-leaf entry tuple. + * + * Any posting list in the source tuple is not copied. The specified child + * block number is inserted into t_tid. + */ + static IndexTuple + GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk) + { + IndexTuple nitup; + + if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) + { + /* Tuple contains a posting list, just copy stuff before that */ + uint32 origsize = GinGetPostingOffset(itup); + + origsize = MAXALIGN(origsize); + nitup = (IndexTuple) palloc(origsize); + memcpy(nitup, itup, origsize); + /* ... be sure to fix the size header field ... */ + nitup->t_info &= ~INDEX_SIZE_MASK; + nitup->t_info |= origsize; + } + else + { + /* Copy the tuple as-is */ + nitup = (IndexTuple) palloc(IndexTupleSize(itup)); + memcpy(nitup, itup, IndexTupleSize(itup)); + } + + /* Now insert the correct downlink */ + GinSetDownlink(nitup, childblk); + + return nitup; + } + + /* * Entry tree is a "static", ie tuple never deletes from it, ! * so we don't use right bound, we use rightmost key instead. */ static IndexTuple getRightMostTuple(Page page) *************** static bool *** 166,181 **** entryIsMoveRight(GinBtree btree, Page page) { IndexTuple itup; if (GinPageRightMost(page)) return FALSE; itup = getRightMostTuple(page); if (ginCompareAttEntries(btree->ginstate, ! btree->entryAttnum, btree->entryValue, ! gintuple_get_attrnum(btree->ginstate, itup), ! gin_index_getattr(btree->ginstate, itup)) > 0) return TRUE; return FALSE; --- 213,232 ---- entryIsMoveRight(GinBtree btree, Page page) { IndexTuple itup; + OffsetNumber attnum; + Datum key; + GinNullCategory category; if (GinPageRightMost(page)) return FALSE; itup = getRightMostTuple(page); + attnum = gintuple_get_attrnum(btree->ginstate, itup); + key = gintuple_get_key(btree->ginstate, itup, &category); if (ginCompareAttEntries(btree->ginstate, ! btree->entryAttnum, btree->entryKey, btree->entryCategory, ! attnum, key, category) > 0) return TRUE; return FALSE; *************** entryIsMoveRight(GinBtree btree, Page pa *** 183,189 **** /* * Find correct tuple in non-leaf page. It supposed that ! * page correctly choosen and searching value SHOULD be on page */ static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack) --- 234,240 ---- /* * Find correct tuple in non-leaf page. It supposed that ! * page correctly chosen and searching value SHOULD be on page */ static BlockNumber entryLocateEntry(GinBtree btree, GinBtreeStack *stack) *************** entryLocateEntry(GinBtree btree, GinBtre *** 216,238 **** OffsetNumber mid = low + ((high - low) / 2); if (mid == maxoff && GinPageRightMost(page)) /* Right infinity */ result = -1; else { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, ! btree->entryValue, ! gintuple_get_attrnum(btree->ginstate, itup), ! gin_index_getattr(btree->ginstate, itup)); } if (result == 0) { stack->off = mid; ! Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO); ! return GinItemPointerGetBlockNumber(&(itup)->t_tid); } else if (result > 0) low = mid + 1; --- 267,297 ---- OffsetNumber mid = low + ((high - low) / 2); if (mid == maxoff && GinPageRightMost(page)) + { /* Right infinity */ result = -1; + } else { + OffsetNumber attnum; + Datum key; + GinNullCategory category; + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); + attnum = gintuple_get_attrnum(btree->ginstate, itup); + key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, ! btree->entryKey, ! btree->entryCategory, ! attnum, key, category); } if (result == 0) { stack->off = mid; ! Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); ! return GinGetDownlink(itup); } else if (result > 0) low = mid + 1; *************** entryLocateEntry(GinBtree btree, GinBtre *** 244,256 **** stack->off = high; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); ! Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO); ! return GinItemPointerGetBlockNumber(&(itup)->t_tid); } /* * Searches correct position for value on leaf page. ! * Page should be corrrectly choosen. * Returns true if value found on page. */ static bool --- 303,315 ---- stack->off = high; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); ! Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); ! return GinGetDownlink(itup); } /* * Searches correct position for value on leaf page. ! * Page should be correctly chosen. * Returns true if value found on page. */ static bool *************** entryLocateLeafEntry(GinBtree btree, Gin *** 259,265 **** Page page = BufferGetPage(stack->buffer); OffsetNumber low, high; - IndexTuple itup; Assert(GinPageIsLeaf(page)); Assert(!GinPageIsData(page)); --- 318,323 ---- *************** entryLocateLeafEntry(GinBtree btree, Gin *** 284,297 **** while (high > low) { OffsetNumber mid = low + ((high - low) / 2); int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, ! btree->entryValue, ! gintuple_get_attrnum(btree->ginstate, itup), ! gin_index_getattr(btree->ginstate, itup)); if (result == 0) { stack->off = mid; --- 342,361 ---- while (high > low) { OffsetNumber mid = low + ((high - low) / 2); + IndexTuple itup; + OffsetNumber attnum; + Datum key; + GinNullCategory category; int result; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); + attnum = gintuple_get_attrnum(btree->ginstate, itup); + key = gintuple_get_key(btree->ginstate, itup, &category); result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, ! btree->entryKey, ! btree->entryCategory, ! attnum, key, category); if (result == 0) { stack->off = mid; *************** entryFindChildPtr(GinBtree btree, Page p *** 321,327 **** if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); ! if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno) return storedOff; /* --- 385,391 ---- if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); ! if (GinGetDownlink(itup) == blkno) return storedOff; /* *************** entryFindChildPtr(GinBtree btree, Page p *** 331,337 **** for (i = storedOff + 1; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); ! if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno) return i; } maxoff = storedOff - 1; --- 395,401 ---- for (i = storedOff + 1; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); ! if (GinGetDownlink(itup) == blkno) return i; } maxoff = storedOff - 1; *************** entryFindChildPtr(GinBtree btree, Page p *** 341,347 **** for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); ! if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno) return i; } --- 405,411 ---- for (i = FirstOffsetNumber; i <= maxoff; i++) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); ! if (GinGetDownlink(itup) == blkno) return i; } *************** entryGetLeftMostPage(GinBtree btree, Pag *** 358,364 **** Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); ! return GinItemPointerGetBlockNumber(&(itup)->t_tid); } static bool --- 422,428 ---- Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); ! return GinGetDownlink(itup); } static bool *************** entryPreparePage(GinBtree btree, Page pa *** 406,412 **** { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ! ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber); ret = btree->rightblkno; } --- 470,476 ---- { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ! GinSetDownlink(itup, btree->rightblkno); ret = btree->rightblkno; } *************** static void *** 422,431 **** entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); - static XLogRecData rdata[3]; OffsetNumber placed; - static ginxlogInsert data; int cnt = 0; *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); --- 486,496 ---- entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { Page page = BufferGetPage(buf); OffsetNumber placed; int cnt = 0; + /* these must be static so they can be returned to caller */ + static XLogRecData rdata[3]; + static ginxlogInsert data; *prdata = rdata; data.updateBlkno = entryPreparePage(btree, page, off); *************** entryPlaceToPage(GinBtree btree, Buffer *** 475,505 **** } /* - * Returns new tuple with copied value from source tuple. - * New tuple will not store posting list - */ - static IndexTuple - copyIndexTuple(IndexTuple itup, Page page) - { - IndexTuple nitup; - - if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) - { - nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup))); - memcpy(nitup, itup, GinGetOrigSizePosting(itup)); - nitup->t_info &= ~INDEX_SIZE_MASK; - nitup->t_info |= GinGetOrigSizePosting(itup); - } - else - { - nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup))); - memcpy(nitup, itup, IndexTupleSize(itup)); - } - - return nitup; - } - - /* * Place tuple and split page, original buffer(lbuf) leaves untouched, * returns shadow page of lbuf filled new data. * Tuples are distributed between pages by equal size on its, not --- 540,545 ---- *************** copyIndexTuple(IndexTuple itup, Page pag *** 508,533 **** static Page entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { - static XLogRecData rdata[2]; OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; - static char tupstore[2 * BLCKSZ]; char *ptr; IndexTuple itup, leftrightmost = NULL; - static ginxlogSplit data; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? ! InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid)); data.updateBlkno = entryPreparePage(btree, lpage, off); maxoff = PageGetMaxOffsetNumber(lpage); --- 548,574 ---- static Page entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { OffsetNumber i, maxoff, separator = InvalidOffsetNumber; Size totalsize = 0; Size lsize = 0, size; char *ptr; IndexTuple itup, leftrightmost = NULL; Page page; Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf)); Page rpage = BufferGetPage(rbuf); Size pageSize = PageGetPageSize(lpage); + /* these must be static so they can be returned to caller */ + static XLogRecData rdata[2]; + static ginxlogSplit data; + static char tupstore[2 * BLCKSZ]; *prdata = rdata; data.leftChildBlkno = (GinPageIsLeaf(lpage)) ? ! InvalidOffsetNumber : GinGetDownlink(btree->entry); data.updateBlkno = entryPreparePage(btree, lpage, off); maxoff = PageGetMaxOffsetNumber(lpage); *************** entrySplitPage(GinBtree btree, Buffer lb *** 588,595 **** ptr += MAXALIGN(IndexTupleSize(itup)); } ! btree->entry = copyIndexTuple(leftrightmost, lpage); ! ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber); btree->rightblkno = BufferGetBlockNumber(rbuf); --- 629,636 ---- ptr += MAXALIGN(IndexTupleSize(itup)); } ! btree->entry = GinFormInteriorTuple(leftrightmost, lpage, ! BufferGetBlockNumber(lbuf)); btree->rightblkno = BufferGetBlockNumber(rbuf); *************** ginPageGetLinkItup(Buffer buf) *** 627,634 **** Page page = BufferGetPage(buf); itup = getRightMostTuple(page); ! nitup = copyIndexTuple(itup, page); ! ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber); return nitup; } --- 668,674 ---- Page page = BufferGetPage(buf); itup = getRightMostTuple(page); ! nitup = GinFormInteriorTuple(itup, page, BufferGetBlockNumber(buf)); return nitup; } *************** ginEntryFillRoot(GinBtree btree, Buffer *** 656,667 **** pfree(itup); } void ! ginPrepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum value, GinState *ginstate) { memset(btree, 0, sizeof(GinBtreeData)); ! btree->index = index; btree->ginstate = ginstate; btree->findChildPage = entryLocateEntry; --- 696,715 ---- pfree(itup); } + /* + * Set up GinBtree for entry page access + * + * Note: during WAL recovery, there may be no valid data in ginstate + * other than a faked-up Relation pointer; the key datum is bogus too. + */ void ! ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, ! Datum key, GinNullCategory category, ! GinState *ginstate) { memset(btree, 0, sizeof(GinBtreeData)); ! btree->index = ginstate->index; btree->ginstate = ginstate; btree->findChildPage = entryLocateEntry; *************** ginPrepareEntryScan(GinBtree btree, Rela *** 680,685 **** btree->isBuild = FALSE; btree->entryAttnum = attnum; ! btree->entryValue = value; btree->isDelete = FALSE; } --- 728,734 ---- btree->isBuild = FALSE; btree->entryAttnum = attnum; ! btree->entryKey = key; ! btree->entryCategory = category; btree->isDelete = FALSE; } diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 3941f7eb007365300fafb8889281041be1887104..19b56766c39b8be356fa8cf8cd8a6f58709acc74 100644 *** a/src/backend/access/gin/ginfast.c --- b/src/backend/access/gin/ginfast.c *************** *** 30,41 **** #define GIN_PAGE_FREESIZE \ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) ) ! typedef struct DatumArray { ! Datum *values; /* expansible array */ int32 nvalues; /* current number of valid entries */ ! int32 maxvalues; /* allocated size of array */ ! } DatumArray; /* --- 30,42 ---- #define GIN_PAGE_FREESIZE \ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) ) ! typedef struct KeyArray { ! Datum *keys; /* expansible array */ ! GinNullCategory *categories; /* another expansible array */ int32 nvalues; /* current number of valid entries */ ! int32 maxvalues; /* allocated size of arrays */ ! } KeyArray; /* *************** writeListPage(Relation index, Buffer buf *** 88,95 **** GinPageGetOpaque(page)->rightlink = rightlink; /* ! * tail page may contain only the whole row(s) or final part of row placed ! * on previous pages */ if (rightlink == InvalidBlockNumber) { --- 89,97 ---- GinPageGetOpaque(page)->rightlink = rightlink; /* ! * tail page may contain only whole row(s) or final part of row placed ! * on previous pages (a "row" here meaning all the index tuples generated ! * for one heap tuple) */ if (rightlink == InvalidBlockNumber) { *************** makeSublist(Relation index, IndexTuple * *** 210,222 **** } /* ! * Inserts collected values during normal insertion. Function guarantees ! * that all values of heap will be stored sequentially, preserving order */ void ! ginHeapTupleFastInsert(Relation index, GinState *ginstate, ! GinTupleCollector *collector) { Buffer metabuffer; Page metapage; GinMetaPageData *metadata = NULL; --- 212,227 ---- } /* ! * Write the index tuples contained in *collector into the index's ! * pending list. ! * ! * Function guarantees that all these tuples will be inserted consecutively, ! * preserving order */ void ! ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) { + Relation index = ginstate->index; Buffer metabuffer; Page metapage; GinMetaPageData *metadata = NULL; *************** ginHeapTupleFastInsert(Relation index, G *** 421,454 **** END_CRIT_SECTION(); if (needCleanup) ! ginInsertCleanup(index, ginstate, false, NULL); } /* ! * Collect values from one tuples to be indexed. All values for ! * one tuples should be written at once - to guarantee consistent state */ ! uint32 ! ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector, ! OffsetNumber attnum, Datum value, ItemPointer item) { Datum *entries; int32 i, nentries; ! entries = ginExtractEntriesSU(ginstate, attnum, value, &nentries); ! ! if (nentries == 0) ! /* nothing to insert */ ! return 0; /* * Allocate/reallocate memory for storing collected tuples */ if (collector->tuples == NULL) { ! collector->lentuples = nentries * index->rd_att->natts; collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples); } --- 426,465 ---- END_CRIT_SECTION(); if (needCleanup) ! ginInsertCleanup(ginstate, false, NULL); } /* ! * Create temporary index tuples for a single indexable item (one index column ! * for the heap tuple specified by ht_ctid), and append them to the array ! * in *collector. They will subsequently be written out using ! * ginHeapTupleFastInsert. Note that to guarantee consistent state, all ! * temp tuples for a given heap tuple must be written in one call to ! * ginHeapTupleFastInsert. */ ! void ! ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, ! OffsetNumber attnum, Datum value, bool isNull, ! ItemPointer ht_ctid) { Datum *entries; + GinNullCategory *categories; int32 i, nentries; ! /* ! * Extract the key values that need to be inserted in the index ! */ ! entries = ginExtractEntries(ginstate, attnum, value, isNull, ! &nentries, &categories); /* * Allocate/reallocate memory for storing collected tuples */ if (collector->tuples == NULL) { ! collector->lentuples = nentries * ginstate->origTupdesc->natts; collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) * collector->lentuples); } *************** ginHeapTupleFastCollect(Relation index, *** 460,478 **** } /* ! * Creates tuple's array */ for (i = 0; i < nentries; i++) { ! collector->tuples[collector->ntuples + i] = ! GinFormTuple(index, ginstate, attnum, entries[i], NULL, 0, true); ! collector->tuples[collector->ntuples + i]->t_tid = *item; ! collector->sumsize += IndexTupleSize(collector->tuples[collector->ntuples + i]); ! } ! ! collector->ntuples += nentries; ! return nentries; } /* --- 471,489 ---- } /* ! * Build an index tuple for each key value, and add to array. In ! * pending tuples we just stick the heap TID into t_tid. */ for (i = 0; i < nentries; i++) { ! IndexTuple itup; ! itup = GinFormTuple(ginstate, attnum, entries[i], categories[i], ! NULL, 0, true); ! itup->t_tid = *ht_ctid; ! collector->tuples[collector->ntuples++] = itup; ! collector->sumsize += IndexTupleSize(itup); ! } } /* *************** shiftList(Relation index, Buffer metabuf *** 591,628 **** return false; } ! /* Add datum to DatumArray, resizing if needed */ static void ! addDatum(DatumArray *datums, Datum datum) { ! if (datums->nvalues >= datums->maxvalues) { ! datums->maxvalues *= 2; ! datums->values = (Datum *) repalloc(datums->values, ! sizeof(Datum) * datums->maxvalues); } ! datums->values[datums->nvalues++] = datum; } /* ! * Go through all tuples >= startoff on page and collect values in memory * ! * Note that da is just workspace --- it does not carry any state across * calls. */ static void ! processPendingPage(BuildAccumulator *accum, DatumArray *da, Page page, OffsetNumber startoff) { ItemPointerData heapptr; OffsetNumber i, maxoff; ! OffsetNumber attrnum, ! curattnum; ! /* reset *da to empty */ ! da->nvalues = 0; maxoff = PageGetMaxOffsetNumber(page); Assert(maxoff >= FirstOffsetNumber); --- 602,656 ---- return false; } ! /* Initialize empty KeyArray */ static void ! initKeyArray(KeyArray *keys, int32 maxvalues) { ! keys->keys = (Datum *) palloc(sizeof(Datum) * maxvalues); ! keys->categories = (GinNullCategory *) ! palloc(sizeof(GinNullCategory) * maxvalues); ! keys->nvalues = 0; ! keys->maxvalues = maxvalues; ! } ! ! /* Add datum to KeyArray, resizing if needed */ ! static void ! addDatum(KeyArray *keys, Datum datum, GinNullCategory category) ! { ! if (keys->nvalues >= keys->maxvalues) { ! keys->maxvalues *= 2; ! keys->keys = (Datum *) ! repalloc(keys->keys, sizeof(Datum) * keys->maxvalues); ! keys->categories = (GinNullCategory *) ! repalloc(keys->categories, sizeof(GinNullCategory) * keys->maxvalues); } ! keys->keys[keys->nvalues] = datum; ! keys->categories[keys->nvalues] = category; ! keys->nvalues++; } /* ! * Collect data from a pending-list page in preparation for insertion into ! * the main index. * ! * Go through all tuples >= startoff on page and collect values in accum ! * ! * Note that ka is just workspace --- it does not carry any state across * calls. */ static void ! processPendingPage(BuildAccumulator *accum, KeyArray *ka, Page page, OffsetNumber startoff) { ItemPointerData heapptr; OffsetNumber i, maxoff; ! OffsetNumber attrnum; ! /* reset *ka to empty */ ! ka->nvalues = 0; maxoff = PageGetMaxOffsetNumber(page); Assert(maxoff >= FirstOffsetNumber); *************** processPendingPage(BuildAccumulator *acc *** 632,638 **** --- 660,670 ---- for (i = startoff; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + OffsetNumber curattnum; + Datum curkey; + GinNullCategory curcategory; + /* Check for change of heap TID or attnum */ curattnum = gintuple_get_attrnum(accum->ginstate, itup); if (!ItemPointerIsValid(&heapptr)) *************** processPendingPage(BuildAccumulator *acc *** 644,661 **** curattnum == attrnum)) { /* ! * We can insert several datums per call, but only for one heap ! * tuple and one column. */ ! ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); ! da->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; } ! addDatum(da, gin_index_getattr(accum->ginstate, itup)); } ! ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues); } /* --- 676,700 ---- curattnum == attrnum)) { /* ! * ginInsertBAEntries can insert several datums per call, but only ! * for one heap tuple and one column. So call it at a boundary, ! * and reset ka. */ ! ginInsertBAEntries(accum, &heapptr, attrnum, ! ka->keys, ka->categories, ka->nvalues); ! ka->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; } ! ! /* Add key to KeyArray */ ! curkey = gintuple_get_key(accum->ginstate, itup, &curcategory); ! addDatum(ka, curkey, curcategory); } ! /* Dump out all remaining keys */ ! ginInsertBAEntries(accum, &heapptr, attrnum, ! ka->keys, ka->categories, ka->nvalues); } /* *************** processPendingPage(BuildAccumulator *acc *** 679,687 **** * If stats isn't null, we count deleted pending pages into the counts. */ void ! ginInsertCleanup(Relation index, GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats) { Buffer metabuffer, buffer; Page metapage, --- 718,727 ---- * If stats isn't null, we count deleted pending pages into the counts. */ void ! ginInsertCleanup(GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats) { + Relation index = ginstate->index; Buffer metabuffer, buffer; Page metapage, *************** ginInsertCleanup(Relation index, GinStat *** 690,696 **** MemoryContext opCtx, oldCtx; BuildAccumulator accum; ! DatumArray datums; BlockNumber blkno; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); --- 730,736 ---- MemoryContext opCtx, oldCtx; BuildAccumulator accum; ! KeyArray datums; BlockNumber blkno; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); *************** ginInsertCleanup(Relation index, GinStat *** 726,735 **** oldCtx = MemoryContextSwitchTo(opCtx); ! datums.maxvalues = 128; ! datums.nvalues = 0; ! datums.values = (Datum *) palloc(sizeof(Datum) * datums.maxvalues); ! ginInitBA(&accum); accum.ginstate = ginstate; --- 766,772 ---- oldCtx = MemoryContextSwitchTo(opCtx); ! initKeyArray(&datums, 128); ginInitBA(&accum); accum.ginstate = ginstate; *************** ginInsertCleanup(Relation index, GinStat *** 748,754 **** } /* ! * read page's datums into memory */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); --- 785,791 ---- } /* ! * read page's datums into accum */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); *************** ginInsertCleanup(Relation index, GinStat *** 769,775 **** { ItemPointerData *list; uint32 nlist; ! Datum entry; OffsetNumber maxoff, attnum; --- 806,813 ---- { ItemPointerData *list; uint32 nlist; ! Datum key; ! GinNullCategory category; OffsetNumber maxoff, attnum; *************** ginInsertCleanup(Relation index, GinStat *** 787,795 **** * list. */ ginBeginBAScan(&accum); ! while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) { ! ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); if (vac_delay) vacuum_delay_point(); } --- 825,835 ---- * list. */ ginBeginBAScan(&accum); ! while ((list = ginGetBAEntry(&accum, ! &attnum, &key, &category, &nlist)) != NULL) { ! ginEntryInsert(ginstate, attnum, key, category, ! list, nlist, NULL); if (vac_delay) vacuum_delay_point(); } *************** ginInsertCleanup(Relation index, GinStat *** 822,829 **** processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); ! while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL) ! ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL); } /* --- 862,871 ---- processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); ! while ((list = ginGetBAEntry(&accum, ! &attnum, &key, &category, &nlist)) != NULL) ! ginEntryInsert(ginstate, attnum, key, category, ! list, nlist, NULL); } /* *************** ginInsertCleanup(Relation index, GinStat *** 857,865 **** * release memory used so far and reinit state */ MemoryContextReset(opCtx); ginInitBA(&accum); - datums.nvalues = 0; - datums.values = (Datum *) palloc(sizeof(Datum) * datums.maxvalues); } else { --- 899,906 ---- * release memory used so far and reinit state */ MemoryContextReset(opCtx); + initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); } else { diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index ed1e71c9d6f291e9d425b65ed1ec28d42b2cd980..19528b2e30ff4a069cf65d0ffdd7c7b0e23c3a53 100644 *** a/src/backend/access/gin/ginget.c --- b/src/backend/access/gin/ginget.c *************** typedef struct pendingPosition *** 33,43 **** } pendingPosition; /* ! * Tries to refind previously taken ItemPointer on page. */ static bool ! findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; --- 33,74 ---- } pendingPosition; + /* convenience function for invoking a key's consistentFn */ + static inline bool + callConsistentFn(GinState *ginstate, GinScanKey key) + { + /* + * If we're dealing with a dummy EMPTY_QUERY key, we don't want to call + * the consistentFn; just claim it matches. + */ + if (key->strategy == InvalidStrategy) + { + key->recheckCurItem = false; + return true; + } + + /* + * Initialize recheckCurItem in case the consistentFn doesn't know it + * should set it. The safe assumption in that case is to force recheck. + */ + key->recheckCurItem = true; + + return DatumGetBool(FunctionCall8(&ginstate->consistentFn[key->attnum - 1], + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + UInt32GetDatum(key->nentries), + PointerGetDatum(key->extra_data), + PointerGetDatum(&key->recheckCurItem), + PointerGetDatum(key->queryValues), + PointerGetDatum(key->queryCategories))); + } + /* ! * Tries to refind previously taken ItemPointer on a posting page. */ static bool ! findItemInPostingPage(Page page, ItemPointer item, OffsetNumber *off) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; int res; *************** moveRightIfItNeeded(GinBtreeData *btree, *** 79,85 **** return false; /* no more pages */ LockBuffer(stack->buffer, GIN_UNLOCK); ! stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); LockBuffer(stack->buffer, GIN_SHARE); stack->off = FirstOffsetNumber; } --- 110,118 ---- return false; /* no more pages */ LockBuffer(stack->buffer, GIN_UNLOCK); ! stack->buffer = ReleaseAndReadBuffer(stack->buffer, ! btree->index, ! stack->blkno); LockBuffer(stack->buffer, GIN_SHARE); stack->off = FirstOffsetNumber; } *************** moveRightIfItNeeded(GinBtreeData *btree, *** 88,104 **** } /* ! * Does fullscan of posting tree and saves ItemPointers ! * in scanEntry->partialMatch TIDBitmap */ static void ! scanForItems(Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree) { GinPostingTreeScan *gdi; Buffer buffer; Page page; BlockNumber blkno; gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE); buffer = ginScanBeginPostingTree(gdi); --- 121,139 ---- } /* ! * Scan all pages of a posting tree and save all its heap ItemPointers ! * in scanEntry->matchBitmap */ static void ! scanPostingTree(Relation index, GinScanEntry scanEntry, ! BlockNumber rootPostingTree) { GinPostingTreeScan *gdi; Buffer buffer; Page page; BlockNumber blkno; + /* Descend to the leftmost leaf page */ gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE); buffer = ginScanBeginPostingTree(gdi); *************** scanForItems(Relation index, GinScanEntr *** 108,158 **** pfree(gdi); /* ! * Goes through all leaves */ for (;;) { page = BufferGetPage(buffer); ! if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) { ! tbm_add_tuples(scanEntry->partialMatch, (ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff, false); scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; } - blkno = GinPageGetOpaque(page)->rightlink; if (GinPageRightMost(page)) ! { ! UnlockReleaseBuffer(buffer); ! return; /* no more pages */ ! } LockBuffer(buffer, GIN_UNLOCK); buffer = ReleaseAndReadBuffer(buffer, index, blkno); LockBuffer(buffer, GIN_SHARE); } } /* ! * Collects all ItemPointer into the TIDBitmap struct ! * for entries partially matched to search entry. * ! * Returns true if done, false if it's needed to restart scan from scratch */ static bool ! computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry) { ! Page page; ! IndexTuple itup; ! Datum idatum; ! int32 cmp; ! scanEntry->partialMatch = tbm_create(work_mem * 1024L); for (;;) { /* * stack->off points to the interested entry, buffer is already locked */ --- 143,207 ---- pfree(gdi); /* ! * Loop iterates through all leaf pages of posting tree */ for (;;) { page = BufferGetPage(buffer); ! if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && ! GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber) { ! tbm_add_tuples(scanEntry->matchBitmap, (ItemPointer) GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff, false); scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; } if (GinPageRightMost(page)) ! break; /* no more pages */ + blkno = GinPageGetOpaque(page)->rightlink; LockBuffer(buffer, GIN_UNLOCK); buffer = ReleaseAndReadBuffer(buffer, index, blkno); LockBuffer(buffer, GIN_SHARE); } + + UnlockReleaseBuffer(buffer); } /* ! * Collects TIDs into scanEntry->matchBitmap for all heap tuples that ! * partial-match the search entry. * ! * Returns true if done, false if it's necessary to restart scan from scratch */ static bool ! computePartialMatchList(GinBtreeData *btree, GinBtreeStack *stack, ! GinScanEntry scanEntry) { ! OffsetNumber attnum; ! Form_pg_attribute attr; ! /* Initialize empty bitmap result */ ! scanEntry->matchBitmap = tbm_create(work_mem * 1024L); ! ! /* Null query cannot partial-match anything */ ! if (scanEntry->queryCategory != GIN_CAT_NORM_KEY) ! return true; ! ! /* Locate tupdesc entry for key column (for attbyval/attlen data) */ ! attnum = scanEntry->parent->attnum; ! attr = btree->ginstate->origTupdesc->attrs[attnum - 1]; for (;;) { + Page page; + IndexTuple itup; + Datum idatum; + GinNullCategory icategory; + int32 cmp; + /* * stack->off points to the interested entry, buffer is already locked */ *************** computePartialMatchList(GinBtreeData *bt *** 165,175 **** /* * If tuple stores another attribute then stop scan */ ! if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum) return true; ! idatum = gin_index_getattr(btree->ginstate, itup); /*---------- * Check of partial match. --- 214,230 ---- /* * If tuple stores another attribute then stop scan */ ! if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) return true; ! idatum = gintuple_get_key(btree->ginstate, itup, &icategory); + /* + * if key is null (including placeholders), stop scan; we assume + * partial matches never match nulls + */ + if (icategory != GIN_CAT_NORM_KEY) + return true; /*---------- * Check of partial match. *************** computePartialMatchList(GinBtreeData *bt *** 178,187 **** * case cmp < 0 => not match and continue scan *---------- */ ! cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[scanEntry->attnum - 1], ! scanEntry->entry, idatum, ! UInt16GetDatum(scanEntry->strategy), PointerGetDatum(scanEntry->extra_data))); if (cmp > 0) --- 233,242 ---- * case cmp < 0 => not match and continue scan *---------- */ ! cmp = DatumGetInt32(FunctionCall4(&btree->ginstate->comparePartialFn[attnum - 1], ! scanEntry->queryKey, idatum, ! UInt16GetDatum(scanEntry->parent->strategy), PointerGetDatum(scanEntry->extra_data))); if (cmp > 0) *************** computePartialMatchList(GinBtreeData *bt *** 195,216 **** if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); - Datum newDatum, - savedDatum = datumCopy( - idatum, - btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval, - btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attlen - ); /* * We should unlock current page (but not unpin) during tree scan * to prevent deadlock with vacuum processes. * ! * We save current entry value (savedDatum) to be able to refind * our tuple after re-locking */ LockBuffer(stack->buffer, GIN_UNLOCK); ! scanForItems(btree->index, scanEntry, rootPostingTree); /* * We lock again the entry page and while it was unlocked insert --- 250,270 ---- if (GinIsPostingTree(itup)) { BlockNumber rootPostingTree = GinGetPostingTree(itup); /* * We should unlock current page (but not unpin) during tree scan * to prevent deadlock with vacuum processes. * ! * We save current entry value (idatum) to be able to re-find * our tuple after re-locking */ + if (icategory == GIN_CAT_NORM_KEY) + idatum = datumCopy(idatum, attr->attbyval, attr->attlen); + LockBuffer(stack->buffer, GIN_UNLOCK); ! ! /* Collect all the TIDs in this entry's posting tree */ ! scanPostingTree(btree->index, scanEntry, rootPostingTree); /* * We lock again the entry page and while it was unlocked insert *************** computePartialMatchList(GinBtreeData *bt *** 222,266 **** { /* * Root page becomes non-leaf while we unlock it. We will ! * start again, this situation doesn't cause often - root can ! * became a non-leaf only one per life of index. */ return false; } for (;;) { if (moveRightIfItNeeded(btree, stack) == false) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ page = BufferGetPage(stack->buffer); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); - newDatum = gin_index_getattr(btree->ginstate, itup); ! if (gintuple_get_attrnum(btree->ginstate, itup) != scanEntry->attnum) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ ! if (ginCompareEntries(btree->ginstate, scanEntry->attnum, ! newDatum, savedDatum) == 0) ! { ! /* Found! */ ! if (btree->ginstate->origTupdesc->attrs[scanEntry->attnum - 1]->attbyval == false) ! pfree(DatumGetPointer(savedDatum)); ! break; ! } stack->off++; } } else { ! tbm_add_tuples(scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false); scanEntry->predictNumberResult += GinGetNPosting(itup); } /* ! * Ok, we save ItemPointers, go to the next entry */ stack->off++; } --- 276,454 ---- { /* * Root page becomes non-leaf while we unlock it. We will ! * start again, this situation doesn't occur often - root can ! * became a non-leaf only once per life of index. */ + return false; + } + + /* Search forward to re-find idatum */ + for (;;) + { + Datum newDatum; + GinNullCategory newCategory; + + if (moveRightIfItNeeded(btree, stack) == false) + elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + + if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) + elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + newDatum = gintuple_get_key(btree->ginstate, itup, + &newCategory); + + if (ginCompareEntries(btree->ginstate, attnum, + newDatum, newCategory, + idatum, icategory) == 0) + break; /* Found! */ + + stack->off++; + } + + if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval) + pfree(DatumGetPointer(idatum)); + } + else + { + tbm_add_tuples(scanEntry->matchBitmap, + GinGetPosting(itup), GinGetNPosting(itup), false); + scanEntry->predictNumberResult += GinGetNPosting(itup); + } + + /* + * OK, go to the next entry + */ + stack->off++; + } + + return true; + } + + /* + * Collects TIDs into scanEntry->matchBitmap for all heap tuples + * starting from the stack's scan point (which should be the leftmost key + * since we want to collect all index entries). + * + * This is just a lobotomized version of computePartialMatchList + * that doesn't do any partial-match comparisons. + * + * Returns true if done, false if it's necessary to restart scan from scratch + */ + static bool + computeFullMatchList(GinBtreeData *btree, GinBtreeStack *stack, + GinScanEntry scanEntry) + { + OffsetNumber attnum; + Form_pg_attribute attr; + + Assert(scanEntry->queryCategory == GIN_CAT_EMPTY_QUERY); + + /* Initialize empty bitmap result */ + scanEntry->matchBitmap = tbm_create(work_mem * 1024L); + + /* Locate tupdesc entry for key column (for attbyval/attlen data) */ + attnum = scanEntry->parent->attnum; + attr = btree->ginstate->origTupdesc->attrs[attnum - 1]; + + for (;;) + { + Page page; + IndexTuple itup; + + /* + * stack->off points to the interested entry, buffer is already locked + */ + if (moveRightIfItNeeded(btree, stack) == false) + return true; + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + + /* + * If tuple stores another attribute then stop scan; otherwise, + * we will collect all TIDs for this attribute (including null + * placeholders). + */ + if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) + return true; + + if (GinIsPostingTree(itup)) + { + BlockNumber rootPostingTree = GinGetPostingTree(itup); + Datum idatum; + GinNullCategory icategory; + + /* + * We should unlock current page (but not unpin) during tree scan + * to prevent deadlock with vacuum processes. + * + * We save current entry value (idatum) to be able to re-find + * our tuple after re-locking + */ + idatum = gintuple_get_key(btree->ginstate, itup, &icategory); + if (icategory == GIN_CAT_NORM_KEY) + idatum = datumCopy(idatum, attr->attbyval, attr->attlen); + + LockBuffer(stack->buffer, GIN_UNLOCK); + + /* Collect all the TIDs in this entry's posting tree */ + scanPostingTree(btree->index, scanEntry, rootPostingTree); + /* + * We lock again the entry page and while it was unlocked insert + * might occured, so we need to refind our position + */ + LockBuffer(stack->buffer, GIN_SHARE); + page = BufferGetPage(stack->buffer); + if (!GinPageIsLeaf(page)) + { + /* + * Root page becomes non-leaf while we unlock it. We will + * start again, this situation doesn't occur often - root can + * became a non-leaf only once per life of index. + */ return false; } + /* Search forward to re-find idatum */ for (;;) { + Datum newDatum; + GinNullCategory newCategory; + if (moveRightIfItNeeded(btree, stack) == false) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ page = BufferGetPage(stack->buffer); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); ! if (gintuple_get_attrnum(btree->ginstate, itup) != attnum) elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + newDatum = gintuple_get_key(btree->ginstate, itup, + &newCategory); ! if (ginCompareEntries(btree->ginstate, attnum, ! newDatum, newCategory, ! idatum, icategory) == 0) ! break; /* Found! */ stack->off++; } + + if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval) + pfree(DatumGetPointer(idatum)); } else { ! tbm_add_tuples(scanEntry->matchBitmap, ! GinGetPosting(itup), GinGetNPosting(itup), false); scanEntry->predictNumberResult += GinGetNPosting(itup); } /* ! * OK, go to the next entry */ stack->off++; } *************** computePartialMatchList(GinBtreeData *bt *** 272,290 **** * Start* functions setup beginning state of searches: finds correct buffer and pins it. */ static void ! startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; ! bool needUnlock = TRUE; entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; ! entry->partialMatch = NULL; ! entry->partialMatchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; --- 460,479 ---- * Start* functions setup beginning state of searches: finds correct buffer and pins it. */ static void ! startScanEntry(GinState *ginstate, GinScanEntry entry) { GinBtreeData btreeEntry; GinBtreeStack *stackEntry; Page page; ! bool needUnlock; + restartScanEntry: entry->buffer = InvalidBuffer; entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; ! entry->matchBitmap = NULL; ! entry->matchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; *************** startScanEntry(Relation index, GinState *** 299,316 **** * posting list in memory */ ! ginPrepareEntryScan(&btreeEntry, index, entry->attnum, entry->entry, ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); entry->isFinished = TRUE; if (entry->isPartialMatch) { /* ! * btreeEntry.findItem points to the first equal or greater value than ! * needed. So we will scan further and collect all ItemPointers */ btreeEntry.findItem(&btreeEntry, stackEntry); if (computePartialMatchList(&btreeEntry, stackEntry, entry) == false) --- 488,509 ---- * posting list in memory */ ! ginPrepareEntryScan(&btreeEntry, entry->parent->attnum, ! entry->queryKey, entry->queryCategory, ! ginstate); btreeEntry.searchMode = TRUE; stackEntry = ginFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); + needUnlock = TRUE; entry->isFinished = TRUE; if (entry->isPartialMatch) { /* ! * btreeEntry.findItem locates the first item >= given search key. ! * We scan forward from there and collect all TIDs until the ! * comparePartialFn says to stop scanning. */ btreeEntry.findItem(&btreeEntry, stackEntry); if (computePartialMatchList(&btreeEntry, stackEntry, entry) == false) *************** startScanEntry(Relation index, GinState *** 320,343 **** * found data and rescan. See comments near 'return false' in * computePartialMatchList() */ ! if (entry->partialMatch) { ! if (entry->partialMatchIterator) ! tbm_end_iterate(entry->partialMatchIterator); ! entry->partialMatchIterator = NULL; ! tbm_free(entry->partialMatch); ! entry->partialMatch = NULL; } LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); ! startScanEntry(index, ginstate, entry); ! return; } ! if (entry->partialMatch && !tbm_is_empty(entry->partialMatch)) { ! entry->partialMatchIterator = tbm_begin_iterate(entry->partialMatch); entry->isFinished = FALSE; } } --- 513,569 ---- * found data and rescan. See comments near 'return false' in * computePartialMatchList() */ ! if (entry->matchBitmap) { ! if (entry->matchIterator) ! tbm_end_iterate(entry->matchIterator); ! entry->matchIterator = NULL; ! tbm_free(entry->matchBitmap); ! entry->matchBitmap = NULL; } LockBuffer(stackEntry->buffer, GIN_UNLOCK); freeGinBtreeStack(stackEntry); + goto restartScanEntry; + } ! if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap)) ! { ! entry->matchIterator = tbm_begin_iterate(entry->matchBitmap); ! entry->isFinished = FALSE; ! } ! } ! else if (entry->queryCategory == GIN_CAT_EMPTY_QUERY) ! { ! /* ! * btreeEntry.findItem locates the first item >= given search key, ! * which will be the leftmost index item because of the way the ! * GIN_CAT_EMPTY_QUERY category code is assigned. ! * We scan forward from there and collect all TIDs in the index. ! */ ! btreeEntry.findItem(&btreeEntry, stackEntry); ! if (computeFullMatchList(&btreeEntry, stackEntry, entry) == false) ! { ! /* ! * GIN tree was seriously restructured, so we will cleanup all ! * found data and rescan. See comments near 'return false' in ! * computeFullMatchList() ! */ ! if (entry->matchBitmap) ! { ! if (entry->matchIterator) ! tbm_end_iterate(entry->matchIterator); ! entry->matchIterator = NULL; ! tbm_free(entry->matchBitmap); ! entry->matchBitmap = NULL; ! } ! LockBuffer(stackEntry->buffer, GIN_UNLOCK); ! freeGinBtreeStack(stackEntry); ! goto restartScanEntry; } ! if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap)) { ! entry->matchIterator = tbm_begin_iterate(entry->matchBitmap); entry->isFinished = FALSE; } } *************** startScanEntry(Relation index, GinState *** 352,358 **** Page page; /* ! * We should unlock entry page before make deal with posting tree * to prevent deadlocks with vacuum processes. Because entry is * never deleted from page and posting tree is never reduced to * the posting list, we can unlock page after getting BlockNumber --- 578,584 ---- Page page; /* ! * We should unlock entry page before touching posting tree * to prevent deadlocks with vacuum processes. Because entry is * never deleted from page and posting tree is never reduced to * the posting list, we can unlock page after getting BlockNumber *************** startScanEntry(Relation index, GinState *** 360,366 **** */ LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; ! gdi = ginPrepareScanPostingTree(index, rootPostingTree, TRUE); entry->buffer = ginScanBeginPostingTree(gdi); --- 586,592 ---- */ LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; ! gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, TRUE); entry->buffer = ginScanBeginPostingTree(gdi); *************** startScanEntry(Relation index, GinState *** 402,420 **** } static void ! startScanKey(Relation index, GinState *ginstate, GinScanKey key) { uint32 i; if (!key->firstCall) return; - for (i = 0; i < key->nentries; i++) - startScanEntry(index, ginstate, key->scanEntry + i); - key->isFinished = FALSE; key->firstCall = FALSE; if (GinFuzzySearchLimit > 0) { /* --- 628,666 ---- } static void ! startScanKey(GinState *ginstate, GinScanKey key) { uint32 i; if (!key->firstCall) return; key->isFinished = FALSE; key->firstCall = FALSE; + if (key->nentries == 0) + { + /* + * We are dealing with an empty query item that provides no useful + * scan constraint information. The only thing we really need from + * it is the recheck flag, which need be evaluated only once. + * + * If the consistentFn returns false for zero keys, we don't need to + * do the scan. (It'd be better if the extractQueryFn were coded to + * return -1 for such a case, but we must follow the protocol.) + */ + if (!callConsistentFn(ginstate, key)) + key->isFinished = TRUE; + + /* Keep its item pointer set at "max" */ + ItemPointerSetMax(&key->curItem); + + return; + } + + for (i = 0; i < key->nentries; i++) + startScanEntry(ginstate, key->scanEntry + i); + if (GinFuzzySearchLimit > 0) { /* *************** startScan(IndexScanDesc scan) *** 444,450 **** GinScanOpaque so = (GinScanOpaque) scan->opaque; for (i = 0; i < so->nkeys; i++) ! startScanKey(scan->indexRelation, &so->ginstate, so->keys + i); } /* --- 690,696 ---- GinScanOpaque so = (GinScanOpaque) scan->opaque; for (i = 0; i < so->nkeys; i++) ! startScanKey(&so->ginstate, so->keys + i); } /* *************** startScan(IndexScanDesc scan) *** 453,459 **** * to prevent interference with vacuum */ static void ! entryGetNextItem(Relation index, GinScanEntry entry) { Page page; BlockNumber blkno; --- 699,705 ---- * to prevent interference with vacuum */ static void ! entryGetNextItem(GinState *ginstate, GinScanEntry entry) { Page page; BlockNumber blkno; *************** entryGetNextItem(Relation index, GinScan *** 487,499 **** return; } ! entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno); LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; if (!ItemPointerIsValid(&entry->curItem) || ! findItemInPage(page, &entry->curItem, &entry->offset)) { /* * Found position equal to or greater than stored --- 733,747 ---- return; } ! entry->buffer = ReleaseAndReadBuffer(entry->buffer, ! ginstate->index, ! blkno); LockBuffer(entry->buffer, GIN_SHARE); page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; if (!ItemPointerIsValid(&entry->curItem) || ! findItemInPostingPage(page, &entry->curItem, &entry->offset)) { /* * Found position equal to or greater than stored *************** entryGetNextItem(Relation index, GinScan *** 512,518 **** * First pages are deleted or empty, or we found exact * position, so break inner loop and continue outer one. */ - break; } --- 760,765 ---- *************** entryGetNextItem(Relation index, GinScan *** 527,551 **** } } - /* convenience function for invoking a key's consistentFn */ - static inline bool - callConsistentFn(GinState *ginstate, GinScanKey key) - { - /* - * Initialize recheckCurItem in case the consistentFn doesn't know it - * should set it. The safe assumption in that case is to force recheck. - */ - key->recheckCurItem = true; - - return DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1], - PointerGetDatum(key->entryRes), - UInt16GetDatum(key->strategy), - key->query, - UInt32GetDatum(key->nentries), - PointerGetDatum(key->extra_data), - PointerGetDatum(&key->recheckCurItem))); - } - #define gin_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE)) #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) --- 774,779 ---- *************** callConsistentFn(GinState *ginstate, Gin *** 563,569 **** * current implementation this is guaranteed by the behavior of tidbitmaps. */ static void ! entryGetItem(Relation index, GinScanEntry entry) { Assert(!entry->isFinished); --- 791,797 ---- * current implementation this is guaranteed by the behavior of tidbitmaps. */ static void ! entryGetItem(GinState *ginstate, GinScanEntry entry) { Assert(!entry->isFinished); *************** entryGetItem(Relation index, GinScanEntr *** 572,612 **** entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } ! else if (entry->partialMatch) { do { ! if (entry->partialMatchResult == NULL || ! entry->offset >= entry->partialMatchResult->ntuples) { ! entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator); ! if (entry->partialMatchResult == NULL) { ItemPointerSetInvalid(&entry->curItem); ! tbm_end_iterate(entry->partialMatchIterator); ! entry->partialMatchIterator = NULL; entry->isFinished = TRUE; break; } /* ! * reset counter to the beginning of ! * entry->partialMatchResult. Note: entry->offset is still ! * greater than partialMatchResult->ntuples if ! * partialMatchResult is lossy. So, on next call we will get ! * next result from TIDBitmap. */ entry->offset = 0; } ! if (entry->partialMatchResult->ntuples < 0) { /* * lossy result, so we need to check the whole page */ ItemPointerSetLossyPage(&entry->curItem, ! entry->partialMatchResult->blockno); /* * We might as well fall out of the loop; we could not --- 800,839 ---- entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } ! else if (entry->matchBitmap) { do { ! if (entry->matchResult == NULL || ! entry->offset >= entry->matchResult->ntuples) { ! entry->matchResult = tbm_iterate(entry->matchIterator); ! if (entry->matchResult == NULL) { ItemPointerSetInvalid(&entry->curItem); ! tbm_end_iterate(entry->matchIterator); ! entry->matchIterator = NULL; entry->isFinished = TRUE; break; } /* ! * Reset counter to the beginning of entry->matchResult. ! * Note: entry->offset is still greater than ! * matchResult->ntuples if matchResult is lossy. So, on next ! * call we will get next result from TIDBitmap. */ entry->offset = 0; } ! if (entry->matchResult->ntuples < 0) { /* * lossy result, so we need to check the whole page */ ItemPointerSetLossyPage(&entry->curItem, ! entry->matchResult->blockno); /* * We might as well fall out of the loop; we could not *************** entryGetItem(Relation index, GinScanEntr *** 617,624 **** } ItemPointerSet(&entry->curItem, ! entry->partialMatchResult->blockno, ! entry->partialMatchResult->offsets[entry->offset]); entry->offset++; } while (entry->reduceResult == TRUE && dropItem(entry)); } --- 844,851 ---- } ItemPointerSet(&entry->curItem, ! entry->matchResult->blockno, ! entry->matchResult->offsets[entry->offset]); entry->offset++; } while (entry->reduceResult == TRUE && dropItem(entry)); } *************** entryGetItem(Relation index, GinScanEntr *** 637,643 **** { do { ! entryGetNextItem(index, entry); } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); --- 864,870 ---- { do { ! entryGetNextItem(ginstate, entry); } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); *************** entryGetItem(Relation index, GinScanEntr *** 662,668 **** * logic in scanGetItem.) */ static void ! keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointer advancePast) { ItemPointerData myAdvancePast = *advancePast; --- 889,895 ---- * logic in scanGetItem.) */ static void ! keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key, ItemPointer advancePast) { ItemPointerData myAdvancePast = *advancePast; *************** keyGetItem(Relation index, GinState *gin *** 676,681 **** --- 903,912 ---- Assert(!key->isFinished); + /* If this is an empty query, startScanKey did everything needed */ + if (key->nentries == 0) + return; + do { /* *************** keyGetItem(Relation index, GinState *gin *** 701,707 **** while (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &myAdvancePast) <= 0) ! entryGetItem(index, entry); if (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &key->curItem) < 0) --- 932,938 ---- while (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &myAdvancePast) <= 0) ! entryGetItem(ginstate, entry); if (entry->isFinished == FALSE && ginCompareItemPointers(&entry->curItem, &key->curItem) < 0) *************** keyGetItem(Relation index, GinState *gin *** 839,851 **** } while (!res); } /* * Get ItemPointer of next heap row to be checked from pending list. ! * Returns false if there are no more. On pages with several rows * it returns each row separately, on page with part of heap row returns ! * per page data. pos->firstOffset and pos->lastOffset points ! * fraction of tuples for current heap row. * * The pendingBuffer is presumed pinned and share-locked on entry, and is * pinned and share-locked on success exit. On failure exit it's released. --- 1070,1211 ---- } while (!res); } + /* + * Get next heap item pointer (after advancePast) from scan. + * Returns true if anything found. + * On success, *item and *recheck are set. + * + * Note: this is very nearly the same logic as in keyGetItem(), except + * that we know the keys are to be combined with AND logic, whereas in + * keyGetItem() the combination logic is known only to the consistentFn. + */ + static bool + scanGetItem(IndexScanDesc scan, ItemPointer advancePast, + ItemPointerData *item, bool *recheck) + { + GinScanOpaque so = (GinScanOpaque) scan->opaque; + ItemPointerData myAdvancePast = *advancePast; + uint32 i; + bool match; + + for (;;) + { + /* + * Advance any keys that are <= myAdvancePast. In particular, + * since key->curItem was initialized with ItemPointerSetMin, this + * ensures we fetch the first item for each key on the first call. + * Then set *item to the minimum of the key curItems. + * + * Note: a lossy-page entry is encoded by a ItemPointer with max value + * for offset (0xffff), so that it will sort after any exact entries + * for the same page. So we'll prefer to return exact pointers not + * lossy pointers, which is good. Also, when we advance past an exact + * entry after processing it, we will not advance past lossy entries + * for the same page in other keys, which is NECESSARY for correct + * results (since we might have additional entries for the same page + * in the first key). + */ + ItemPointerSetMax(item); + + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + while (key->isFinished == FALSE && + ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0) + keyGetItem(&so->ginstate, so->tempCtx, + key, &myAdvancePast); + + if (key->isFinished) + return FALSE; /* finished one of keys */ + + if (ginCompareItemPointers(&key->curItem, item) < 0) + *item = key->curItem; + } + + Assert(!ItemPointerIsMax(item)); + + /*---------- + * Now *item contains first ItemPointer after previous result. + * + * The item is a valid hit only if all the keys returned either + * that exact TID, or a lossy reference to the same page. + * + * This logic works only if a keyGetItem stream can never contain both + * exact and lossy pointers for the same page. Else we could have a + * case like + * + * stream 1 stream 2 + * ... ... + * 42/6 42/7 + * 50/1 42/0xffff + * ... ... + * + * We would conclude that 42/6 is not a match and advance stream 1, + * thus never detecting the match to the lossy pointer in stream 2. + * (keyGetItem has a similar problem versus entryGetItem.) + *---------- + */ + match = true; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + /* empty queries don't produce tuples and don't affect result */ + if (key->nentries == 0) + continue; + + if (ginCompareItemPointers(item, &key->curItem) == 0) + continue; + if (ItemPointerIsLossyPage(&key->curItem) && + GinItemPointerGetBlockNumber(&key->curItem) == + GinItemPointerGetBlockNumber(item)) + continue; + + match = false; + break; + } + + if (match) + break; + + /* + * No hit. Update myAdvancePast to this TID, so that on the next + * pass we'll move to the next possible entry. + */ + myAdvancePast = *item; + } + + /* + * We must return recheck = true if any of the keys are marked recheck. + */ + *recheck = false; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + if (key->recheckCurItem) + { + *recheck = true; + break; + } + } + + return TRUE; + } + + + /* + * Functions for scanning the pending list + */ + /* * Get ItemPointer of next heap row to be checked from pending list. ! * Returns false if there are no more. On pages with several heap rows * it returns each row separately, on page with part of heap row returns ! * per page data. pos->firstOffset and pos->lastOffset are set to identify ! * the range of pending-list tuples belonging to this heap row. * * The pendingBuffer is presumed pinned and share-locked on entry, and is * pinned and share-locked on success exit. On failure exit it's released. *************** scanGetCandidate(IndexScanDesc scan, pen *** 917,926 **** /* * Now pos->firstOffset points to the first tuple of current heap ! * row, pos->lastOffset points to the first tuple of second heap * row (or to the end of page) */ - break; } } --- 1277,1285 ---- /* * Now pos->firstOffset points to the first tuple of current heap ! * row, pos->lastOffset points to the first tuple of next heap * row (or to the end of page) */ break; } } *************** scanGetCandidate(IndexScanDesc scan, pen *** 929,963 **** } /* ! * Scan page from current tuple (off) up till the first of: * - match is found (then returns true) * - no later match is possible * - tuple's attribute number is not equal to entry's attrnum * - reach end of page */ static bool matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, OffsetNumber maxoff, ! Datum value, OffsetNumber attrnum, ! Datum *datum, bool *datumExtracted, ! StrategyNumber strategy, ! Pointer extra_data) { IndexTuple itup; int32 cmp; while (off < maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ! if (attrnum != gintuple_get_attrnum(ginstate, itup)) return false; if (datumExtracted[off - 1] == false) { ! datum[off - 1] = gin_index_getattr(ginstate, itup); datumExtracted[off - 1] = true; } /*---------- * Check partial match. * case cmp == 0 => match --- 1288,1334 ---- } /* ! * Scan pending-list page from current tuple (off) up till the first of: * - match is found (then returns true) * - no later match is possible * - tuple's attribute number is not equal to entry's attrnum * - reach end of page + * + * datum[]/category[]/datumExtracted[] arrays are used to cache the results + * of gintuple_get_key() on the current page. */ static bool matchPartialInPendingList(GinState *ginstate, Page page, OffsetNumber off, OffsetNumber maxoff, ! GinScanEntry entry, ! Datum *datum, GinNullCategory *category, ! bool *datumExtracted) { IndexTuple itup; int32 cmp; + /* Partial match to a null is not possible */ + if (entry->queryCategory != GIN_CAT_NORM_KEY) + return false; + while (off < maxoff) { itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); ! ! if (gintuple_get_attrnum(ginstate, itup) != entry->parent->attnum) return false; if (datumExtracted[off - 1] == false) { ! datum[off - 1] = gintuple_get_key(ginstate, itup, ! &category[off - 1]); datumExtracted[off - 1] = true; } + /* Once we hit nulls, no further match is possible */ + if (category[off - 1] != GIN_CAT_NORM_KEY) + return false; + /*---------- * Check partial match. * case cmp == 0 => match *************** matchPartialInPendingList(GinState *gins *** 965,975 **** * case cmp < 0 => not match and continue scan *---------- */ ! cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[attrnum - 1], ! value, datum[off - 1], ! UInt16GetDatum(strategy), ! PointerGetDatum(extra_data))); if (cmp == 0) return true; else if (cmp > 0) --- 1336,1346 ---- * case cmp < 0 => not match and continue scan *---------- */ ! cmp = DatumGetInt32(FunctionCall4(&ginstate->comparePartialFn[entry->parent->attnum - 1], ! entry->queryKey, datum[off - 1], ! UInt16GetDatum(entry->parent->strategy), ! PointerGetDatum(entry->extra_data))); if (cmp == 0) return true; else if (cmp > 0) *************** matchPartialInPendingList(GinState *gins *** 981,1007 **** return false; } - static bool - hasAllMatchingKeys(GinScanOpaque so, pendingPosition *pos) - { - int i; - - for (i = 0; i < so->nkeys; i++) - if (pos->hasMatchKey[i] == false) - return false; - - return true; - } - /* ! * Sets entryRes array for each key by looking at ! * every entry per indexed value (heap's row) in pending list. ! * returns true if at least one of datum was matched by key's entry * * The pendingBuffer is presumed pinned and share-locked on entry. */ static bool ! collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) { GinScanOpaque so = (GinScanOpaque) scan->opaque; OffsetNumber attrnum; --- 1352,1371 ---- return false; } /* ! * Set up the entryRes array for each key by looking at ! * every entry for current heap row in pending list. ! * ! * Returns true if each scan key has at least one entryRes match. ! * This corresponds to the situations where the normal index search will ! * try to apply the key's consistentFn. (A tuple not meeting that requirement ! * cannot be returned by the normal search since no entry stream will ! * source its TID.) * * The pendingBuffer is presumed pinned and share-locked on entry. */ static bool ! collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) { GinScanOpaque so = (GinScanOpaque) scan->opaque; OffsetNumber attrnum; *************** collectDatumForItem(IndexScanDesc scan, *** 1011,1033 **** j; /* ! * Reset entryRes */ for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; memset(key->entryRes, FALSE, key->nentries); } - memset(pos->hasMatchKey, FALSE, so->nkeys); for (;;) { Datum datum[BLCKSZ / sizeof(IndexTupleData)]; bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)]; Assert(pos->lastOffset > pos->firstOffset); ! memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset)); page = BufferGetPage(pos->pendingBuffer); --- 1375,1405 ---- j; /* ! * Reset entryRes and hasMatchKey. For a full-scan key (nentries==0), ! * set hasMatchKey true because the inner loop below won't be iterated ! * and we don't want such a key to cause us to return "false" at the end. */ for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; memset(key->entryRes, FALSE, key->nentries); + pos->hasMatchKey[i] = (key->nentries == 0); } + /* + * Outer loop iterates over multiple pending-list pages when a single + * heap row has entries spanning those pages. + */ for (;;) { Datum datum[BLCKSZ / sizeof(IndexTupleData)]; + GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)]; bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)]; Assert(pos->lastOffset > pos->firstOffset); ! memset(datumExtracted + pos->firstOffset - 1, 0, ! sizeof(bool) * (pos->lastOffset - pos->firstOffset)); page = BufferGetPage(pos->pendingBuffer); *************** collectDatumForItem(IndexScanDesc scan, *** 1037,1164 **** for (j = 0; j < key->nentries; j++) { OffsetNumber StopLow = pos->firstOffset, StopHigh = pos->lastOffset, StopMiddle; - GinScanEntry entry = key->scanEntry + j; ! /* already true - do not extra work */ if (key->entryRes[j]) continue; /* ! * Interested tuples are from pos->firstOffset to * pos->lastOffset and they are ordered by (attnum, Datum) as ! * it's done in entry tree So we could use binary search to ! * prevent linear scanning */ while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle)); attrnum = gintuple_get_attrnum(&so->ginstate, itup); if (key->attnum < attrnum) StopHigh = StopMiddle; ! else if (key->attnum > attrnum) StopLow = StopMiddle + 1; ! else { ! int res; ! if (datumExtracted[StopMiddle - 1] == false) ! { ! datum[StopMiddle - 1] = gin_index_getattr(&so->ginstate, itup); ! datumExtracted[StopMiddle - 1] = true; ! } ! res = ginCompareEntries(&so->ginstate, ! entry->attnum, ! entry->entry, ! datum[StopMiddle - 1]); ! if (res == 0) ! { ! /* ! * The exact match causes, so we just scan from ! * current position to find a partial match. See ! * comment above about tuple's ordering. ! */ ! if (entry->isPartialMatch) ! key->entryRes[j] = ! matchPartialInPendingList(&so->ginstate, ! page, StopMiddle, ! pos->lastOffset, ! entry->entry, ! entry->attnum, ! datum, ! datumExtracted, ! entry->strategy, ! entry->extra_data); ! else ! key->entryRes[j] = true; ! break; ! } ! else if (res < 0) ! StopHigh = StopMiddle; else ! StopLow = StopMiddle + 1; } } if (StopLow >= StopHigh && entry->isPartialMatch) { /* ! * The exact match wasn't found, so we need to start scan ! * from first tuple greater then current entry See comment ! * above about tuple's ordering. */ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, ! page, StopHigh, pos->lastOffset, ! entry->entry, ! entry->attnum, datum, ! datumExtracted, ! entry->strategy, ! entry->extra_data); } pos->hasMatchKey[i] |= key->entryRes[j]; } } pos->firstOffset = pos->lastOffset; if (GinPageHasFullRow(page)) { /* ! * We scan all values from one tuple, go to next one */ ! ! return hasAllMatchingKeys(so, pos); } else { - ItemPointerData item = pos->item; - /* ! * need to get next portion of tuples of row containing on several ! * pages */ ! if (scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item)) ! elog(ERROR, "Could not process tuple"); /* XXX should not be ! * here ! */ } } ! return hasAllMatchingKeys(so, pos); } /* ! * Collect all matched rows from pending list in bitmap */ static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) --- 1409,1563 ---- for (j = 0; j < key->nentries; j++) { + GinScanEntry entry = key->scanEntry + j; OffsetNumber StopLow = pos->firstOffset, StopHigh = pos->lastOffset, StopMiddle; ! /* If already matched on earlier page, do no extra work */ if (key->entryRes[j]) continue; /* ! * Interesting tuples are from pos->firstOffset to * pos->lastOffset and they are ordered by (attnum, Datum) as ! * it's done in entry tree. So we can use binary search to ! * avoid linear scanning. */ while (StopLow < StopHigh) { + int res; + StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle)); + attrnum = gintuple_get_attrnum(&so->ginstate, itup); if (key->attnum < attrnum) + { StopHigh = StopMiddle; ! continue; ! } ! if (key->attnum > attrnum) ! { StopLow = StopMiddle + 1; ! continue; ! } ! ! if (datumExtracted[StopMiddle - 1] == false) { ! datum[StopMiddle - 1] = ! gintuple_get_key(&so->ginstate, itup, ! &category[StopMiddle - 1]); ! datumExtracted[StopMiddle - 1] = true; ! } ! res = ginCompareEntries(&so->ginstate, ! entry->parent->attnum, ! entry->queryKey, ! entry->queryCategory, ! datum[StopMiddle - 1], ! category[StopMiddle - 1]); ! if (res == 0) ! { ! /* ! * Found exact match (there can be only one). ! * ! * If doing partial match, scan forward from ! * here to end of page to check for matches. ! * ! * See comment above about tuple's ordering. ! */ ! if (entry->isPartialMatch) ! key->entryRes[j] = ! matchPartialInPendingList(&so->ginstate, ! page, ! StopMiddle, ! pos->lastOffset, ! entry, ! datum, ! category, ! datumExtracted); else ! key->entryRes[j] = true; ! ! /* done with binary search */ ! break; } + else if (res < 0) + StopHigh = StopMiddle; + else + StopLow = StopMiddle + 1; } if (StopLow >= StopHigh && entry->isPartialMatch) { /* ! * No exact match on this page. If doing partial ! * match, scan from the first tuple greater than ! * target value to end of page. Note that since we ! * don't remember whether the comparePartialFn told us ! * to stop early on a previous page, we will uselessly ! * apply comparePartialFn to the first tuple on each ! * subsequent page. */ key->entryRes[j] = matchPartialInPendingList(&so->ginstate, ! page, ! StopHigh, pos->lastOffset, ! entry, datum, ! category, ! datumExtracted); } pos->hasMatchKey[i] |= key->entryRes[j]; } } + /* Advance firstOffset over the scanned tuples */ pos->firstOffset = pos->lastOffset; if (GinPageHasFullRow(page)) { /* ! * We have examined all pending entries for the current heap row. ! * Break out of loop over pages. */ ! break; } else { /* ! * Advance to next page of pending entries for the current heap ! * row. Complain if there isn't one. */ + ItemPointerData item = pos->item; ! if (scanGetCandidate(scan, pos) == false || ! !ItemPointerEquals(&pos->item, &item)) ! elog(ERROR, "could not find additional pending pages for same heap tuple"); } } ! /* ! * Now return "true" if all scan keys have at least one matching datum ! * (ignoring full-scan keys) ! */ ! for (i = 0; i < so->nkeys; i++) ! { ! if (pos->hasMatchKey[i] == false) ! return false; ! } ! ! return true; } /* ! * Collect all matched rows from pending list into bitmap */ static void scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) *************** scanPendingInsert(IndexScanDesc scan, TI *** 1201,1211 **** while (scanGetCandidate(scan, &pos)) { /* ! * Check entries in tuple and setup entryRes array If tuples of heap's ! * row are placed on several pages collectDatumForItem will read all ! * of that pages. */ ! if (!collectDatumForItem(scan, &pos)) continue; /* --- 1600,1612 ---- while (scanGetCandidate(scan, &pos)) { /* ! * Check entries in tuple and set up entryRes array. ! * ! * If pending tuples belonging to the current heap row are spread ! * across several pages, collectMatchesForHeapRow will read all of ! * those pages. */ ! if (!collectMatchesForHeapRow(scan, &pos)) continue; /* *************** scanPendingInsert(IndexScanDesc scan, TI *** 1241,1364 **** pfree(pos.hasMatchKey); } - /* - * Get next heap item pointer (after advancePast) from scan. - * Returns true if anything found. - * On success, *item and *recheck are set. - * - * Note: this is very nearly the same logic as in keyGetItem(), except - * that we know the keys are to be combined with AND logic, whereas in - * keyGetItem() the combination logic is known only to the consistentFn. - */ - static bool - scanGetItem(IndexScanDesc scan, ItemPointer advancePast, - ItemPointerData *item, bool *recheck) - { - GinScanOpaque so = (GinScanOpaque) scan->opaque; - ItemPointerData myAdvancePast = *advancePast; - uint32 i; - bool match; - - for (;;) - { - /* - * Advance any keys that are <= myAdvancePast. In particular, - * since key->curItem was initialized with ItemPointerSetMin, this - * ensures we fetch the first item for each key on the first call. - * Then set *item to the minimum of the key curItems. - * - * Note: a lossy-page entry is encoded by a ItemPointer with max value - * for offset (0xffff), so that it will sort after any exact entries - * for the same page. So we'll prefer to return exact pointers not - * lossy pointers, which is good. Also, when we advance past an exact - * entry after processing it, we will not advance past lossy entries - * for the same page in other keys, which is NECESSARY for correct - * results (since we might have additional entries for the same page - * in the first key). - */ - ItemPointerSetMax(item); - - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - while (key->isFinished == FALSE && - ginCompareItemPointers(&key->curItem, &myAdvancePast) <= 0) - keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, - key, &myAdvancePast); - - if (key->isFinished) - return FALSE; /* finished one of keys */ - - if (ginCompareItemPointers(&key->curItem, item) < 0) - *item = key->curItem; - } - - Assert(!ItemPointerIsMax(item)); - - /*---------- - * Now *item contains first ItemPointer after previous result. - * - * The item is a valid hit only if all the keys returned either - * that exact TID, or a lossy reference to the same page. - * - * This logic works only if a keyGetItem stream can never contain both - * exact and lossy pointers for the same page. Else we could have a - * case like - * - * stream 1 stream 2 - * ... ... - * 42/6 42/7 - * 50/1 42/0xffff - * ... ... - * - * We would conclude that 42/6 is not a match and advance stream 1, - * thus never detecting the match to the lossy pointer in stream 2. - * (keyGetItem has a similar problem versus entryGetItem.) - *---------- - */ - match = true; - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - if (ginCompareItemPointers(item, &key->curItem) == 0) - continue; - if (ItemPointerIsLossyPage(&key->curItem) && - GinItemPointerGetBlockNumber(&key->curItem) == - GinItemPointerGetBlockNumber(item)) - continue; - match = false; - break; - } - - if (match) - break; - - /* - * No hit. Update myAdvancePast to this TID, so that on the next - * pass we'll move to the next possible entry. - */ - myAdvancePast = *item; - } - - /* - * We must return recheck = true if any of the keys are marked recheck. - */ - *recheck = false; - for (i = 0; i < so->nkeys; i++) - { - GinScanKey key = so->keys + i; - - if (key->recheckCurItem) - { - *recheck = true; - break; - } - } - - return TRUE; - } #define GinIsNewKey(s) ( ((GinScanOpaque) scan->opaque)->keys == NULL ) #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes ) --- 1642,1647 ---- *************** gingetbitmap(PG_FUNCTION_ARGS) *** 1372,1377 **** --- 1655,1663 ---- ItemPointerData iptr; bool recheck; + /* + * Set up the scan keys, and check for unsatisfiable query. + */ if (GinIsNewKey(scan)) ginNewScanKey(scan); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 5b146d6c265c6d54037fdbf0869d20c101fe2073..7ce9d21a606396042296cd27832d8cc31e353841 100644 *** a/src/backend/access/gin/gininsert.c --- b/src/backend/access/gin/gininsert.c *************** typedef struct *** 35,42 **** } GinBuildState; /* ! * Creates posting tree with one page. Function ! * suppose that items[] fits to page */ static BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) --- 35,44 ---- } GinBuildState; /* ! * Creates new posting tree with one page, containing the given TIDs. ! * Returns the page number (which will be the root of this posting tree). ! * ! * items[] must be in sorted order with no duplicates. */ static BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems) *************** createPostingTree(Relation index, ItemPo *** 45,50 **** --- 47,55 ---- Buffer buffer = GinNewBuffer(index); Page page; + /* Assert that the items[] array will fit on one page */ + Assert(nitems <= GinMaxLeafDataItems); + START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_DATA | GIN_LEAF); *************** createPostingTree(Relation index, ItemPo *** 76,87 **** rdata[1].len = sizeof(ItemPointerData) * nitems; rdata[1].next = NULL; - - recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata); PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); - } UnlockReleaseBuffer(buffer); --- 81,89 ---- *************** createPostingTree(Relation index, ItemPo *** 93,120 **** /* ! * Adds array of item pointers to tuple's posting list or ! * creates posting tree and tuple pointed to tree in a case * of not enough space. Max size of tuple is defined in ! * GinFormTuple(). */ static IndexTuple ! addItemPointersToTuple(Relation index, GinState *ginstate, ! GinBtreeStack *stack, IndexTuple old, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats) { ! Datum key = gin_index_getattr(ginstate, old); ! OffsetNumber attnum = gintuple_get_attrnum(ginstate, old); ! IndexTuple res = GinFormTuple(index, ginstate, attnum, key, ! NULL, nitem + GinGetNPosting(old), ! false); if (res) { /* good, small enough */ uint32 newnitem; newnitem = ginMergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), --- 95,133 ---- /* ! * Adds array of item pointers to tuple's posting list, or ! * creates posting tree and tuple pointing to tree in case * of not enough space. Max size of tuple is defined in ! * GinFormTuple(). Returns a new, modified index tuple. ! * items[] must be in sorted order with no duplicates. */ static IndexTuple ! addItemPointersToLeafTuple(GinState *ginstate, ! IndexTuple old, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats) { ! OffsetNumber attnum; ! Datum key; ! GinNullCategory category; ! IndexTuple res; ! ! Assert(!GinIsPostingTree(old)); ! ! attnum = gintuple_get_attrnum(ginstate, old); ! key = gintuple_get_key(ginstate, old, &category); ! ! /* try to build tuple with room for all the items */ ! res = GinFormTuple(ginstate, attnum, key, category, ! NULL, nitem + GinGetNPosting(old), ! false); if (res) { /* good, small enough */ uint32 newnitem; + /* fill in the posting list with union of old and new TIDs */ newnitem = ginMergeItemPointers(GinGetPosting(res), GinGetPosting(old), GinGetNPosting(old), *************** addItemPointersToTuple(Relation index, G *** 124,161 **** } else { BlockNumber postingRoot; GinPostingTreeScan *gdi; ! /* posting list becomes big, so we need to make posting's tree */ ! res = GinFormTuple(index, ginstate, attnum, key, NULL, 0, true); ! postingRoot = createPostingTree(index, GinGetPosting(old), GinGetNPosting(old)); ! GinSetPostingTree(res, postingRoot); ! gdi = ginPrepareScanPostingTree(index, postingRoot, FALSE); gdi->btree.isBuild = (buildStats != NULL); ! ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); /* During index build, count the newly-added data page */ if (buildStats) buildStats->nDataPages++; } return res; } /* ! * Inserts only one entry to the index, but it can add more than 1 ItemPointer. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ! ginEntryInsert(Relation index, GinState *ginstate, ! OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { --- 137,251 ---- } else { + /* posting list would be too big, convert to posting tree */ BlockNumber postingRoot; GinPostingTreeScan *gdi; ! /* ! * Initialize posting tree with the old tuple's posting list. It's ! * surely small enough to fit on one posting-tree page, and should ! * already be in order with no duplicates. ! */ ! postingRoot = createPostingTree(ginstate->index, ! GinGetPosting(old), ! GinGetNPosting(old)); ! /* During index build, count the newly-added data page */ ! if (buildStats) ! buildStats->nDataPages++; ! ! /* Now insert the TIDs-to-be-added into the posting tree */ ! gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE); gdi->btree.isBuild = (buildStats != NULL); ! ginInsertItemPointers(gdi, items, nitem, buildStats); pfree(gdi); + /* And build a new posting-tree-only result tuple */ + res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true); + GinSetPostingTree(res, postingRoot); + } + + return res; + } + + /* + * Build a fresh leaf tuple, either posting-list or posting-tree format + * depending on whether the given items list will fit. + * items[] must be in sorted order with no duplicates. + * + * This is basically the same logic as in addItemPointersToLeafTuple, + * but working from slightly different input. + */ + static IndexTuple + buildFreshLeafTuple(GinState *ginstate, + OffsetNumber attnum, Datum key, GinNullCategory category, + ItemPointerData *items, uint32 nitem, + GinStatsData *buildStats) + { + IndexTuple res; + + /* try to build tuple with room for all the items */ + res = GinFormTuple(ginstate, attnum, key, category, + items, nitem, false); + + if (!res) + { + /* posting list would be too big, build posting tree */ + BlockNumber postingRoot; + + /* + * Build posting-tree-only result tuple. We do this first so as + * to fail quickly if the key is too big. + */ + res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, true); + + /* + * Initialize posting tree with as many TIDs as will fit on the + * first page. + */ + postingRoot = createPostingTree(ginstate->index, + items, + Min(nitem, GinMaxLeafDataItems)); + /* During index build, count the newly-added data page */ if (buildStats) buildStats->nDataPages++; + + /* Add any remaining TIDs to the posting tree */ + if (nitem > GinMaxLeafDataItems) + { + GinPostingTreeScan *gdi; + + gdi = ginPrepareScanPostingTree(ginstate->index, postingRoot, FALSE); + gdi->btree.isBuild = (buildStats != NULL); + + ginInsertItemPointers(gdi, + items + GinMaxLeafDataItems, + nitem - GinMaxLeafDataItems, + buildStats); + + pfree(gdi); + } + + /* And save the root link in the result tuple */ + GinSetPostingTree(res, postingRoot); } return res; } /* ! * Insert one or more heap TIDs associated with the given key value. ! * This will either add a single key entry, or enlarge a pre-existing entry. * * During an index build, buildStats is non-null and the counters * it contains should be incremented as needed. */ void ! ginEntryInsert(GinState *ginstate, ! OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats) { *************** ginEntryInsert(Relation index, GinState *** 168,252 **** if (buildStats) buildStats->nEntries++; ! ginPrepareEntryScan(&btree, index, attnum, value, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { ! /* found entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { ! /* lock root of posting tree */ ! GinPostingTreeScan *gdi; BlockNumber rootPostingTree = GinGetPostingTree(itup); /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ ! gdi = ginPrepareScanPostingTree(index, rootPostingTree, FALSE); gdi->btree.isBuild = (buildStats != NULL); ! ginInsertItemPointer(gdi, items, nitem, buildStats); pfree(gdi); return; } ! itup = addItemPointersToTuple(index, ginstate, stack, itup, ! items, nitem, buildStats); btree.isDelete = TRUE; } else { ! /* We suppose that tuple can store at least one itempointer */ ! itup = GinFormTuple(index, ginstate, attnum, value, items, 1, true); ! ! if (nitem > 1) ! { ! /* Add the rest, making a posting tree if necessary */ ! IndexTuple previtup = itup; ! ! itup = addItemPointersToTuple(index, ginstate, stack, previtup, ! items + 1, nitem - 1, buildStats); ! pfree(previtup); ! } } btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); } /* ! * Saves indexed value in memory accumulator during index creation ! * Function isn't used during normal insert */ ! static uint32 ! ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, Datum value, ItemPointer heapptr) { Datum *entries; int32 nentries; MemoryContext oldCtx; oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); ! entries = ginExtractEntriesSU(buildstate->accum.ginstate, attnum, value, &nentries); MemoryContextSwitchTo(oldCtx); ! if (nentries == 0) ! /* nothing to insert */ ! return 0; ! ginInsertRecordBA(&buildstate->accum, heapptr, attnum, entries, nentries); MemoryContextReset(buildstate->funcCtx); - - return nentries; } static void --- 258,339 ---- if (buildStats) buildStats->nEntries++; ! ginPrepareEntryScan(&btree, attnum, key, category, ginstate); stack = ginFindLeafPage(&btree, NULL); page = BufferGetPage(stack->buffer); if (btree.findItem(&btree, stack)) { ! /* found pre-existing entry */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); if (GinIsPostingTree(itup)) { ! /* add entries to existing posting tree */ BlockNumber rootPostingTree = GinGetPostingTree(itup); + GinPostingTreeScan *gdi; /* release all stack */ LockBuffer(stack->buffer, GIN_UNLOCK); freeGinBtreeStack(stack); /* insert into posting tree */ ! gdi = ginPrepareScanPostingTree(ginstate->index, rootPostingTree, FALSE); gdi->btree.isBuild = (buildStats != NULL); ! ginInsertItemPointers(gdi, items, nitem, buildStats); pfree(gdi); return; } ! /* modify an existing leaf entry */ ! itup = addItemPointersToLeafTuple(ginstate, itup, ! items, nitem, buildStats); btree.isDelete = TRUE; } else { ! /* no match, so construct a new leaf entry */ ! itup = buildFreshLeafTuple(ginstate, attnum, key, category, ! items, nitem, buildStats); } + /* Insert the new or modified leaf tuple */ btree.entry = itup; ginInsertValue(&btree, stack, buildStats); pfree(itup); } /* ! * Extract index entries for a single indexable item, and add them to the ! * BuildAccumulator's state. ! * ! * This function is used only during initial index creation. */ ! static void ! ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, ! Datum value, bool isNull, ! ItemPointer heapptr) { Datum *entries; + GinNullCategory *categories; int32 nentries; MemoryContext oldCtx; oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); ! entries = ginExtractEntries(buildstate->accum.ginstate, attnum, ! value, isNull, ! &nentries, &categories); MemoryContextSwitchTo(oldCtx); ! ginInsertBAEntries(&buildstate->accum, heapptr, attnum, ! entries, categories, nentries); ! buildstate->indtuples += nentries; MemoryContextReset(buildstate->funcCtx); } static void *************** ginBuildCallback(Relation index, HeapTup *** 260,284 **** oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) ! if (!isnull[i]) ! buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, ! (OffsetNumber) (i + 1), values[i], ! &htup->t_self); /* If we've maxed out our available memory, dump everything to the index */ if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L) { ItemPointerData *list; ! Datum entry; uint32 nlist; OffsetNumber attnum; ginBeginBAScan(&buildstate->accum); ! while ((list = ginGetEntry(&buildstate->accum, &attnum, &entry, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ! ginEntryInsert(index, &buildstate->ginstate, attnum, entry, list, nlist, &buildstate->buildStats); } --- 347,372 ---- oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) ! ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), ! values[i], isnull[i], ! &htup->t_self); /* If we've maxed out our available memory, dump everything to the index */ if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L) { ItemPointerData *list; ! Datum key; ! GinNullCategory category; uint32 nlist; OffsetNumber attnum; ginBeginBAScan(&buildstate->accum); ! while ((list = ginGetBAEntry(&buildstate->accum, ! &attnum, &key, &category, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ! ginEntryInsert(&buildstate->ginstate, attnum, key, category, list, nlist, &buildstate->buildStats); } *************** ginbuild(PG_FUNCTION_ARGS) *** 301,307 **** Buffer RootBuffer, MetaBuffer; ItemPointerData *list; ! Datum entry; uint32 nlist; MemoryContext oldCtx; OffsetNumber attnum; --- 389,396 ---- Buffer RootBuffer, MetaBuffer; ItemPointerData *list; ! Datum key; ! GinNullCategory category; uint32 nlist; MemoryContext oldCtx; OffsetNumber attnum; *************** ginbuild(PG_FUNCTION_ARGS) *** 384,394 **** /* dump remaining entries to the index */ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); ginBeginBAScan(&buildstate.accum); ! while ((list = ginGetEntry(&buildstate.accum, &attnum, &entry, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ! ginEntryInsert(index, &buildstate.ginstate, attnum, entry, list, nlist, &buildstate.buildStats); } MemoryContextSwitchTo(oldCtx); --- 473,484 ---- /* dump remaining entries to the index */ oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); ginBeginBAScan(&buildstate.accum); ! while ((list = ginGetBAEntry(&buildstate.accum, ! &attnum, &key, &category, &nlist)) != NULL) { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); ! ginEntryInsert(&buildstate.ginstate, attnum, key, category, list, nlist, &buildstate.buildStats); } MemoryContextSwitchTo(oldCtx); *************** ginbuildempty(PG_FUNCTION_ARGS) *** 454,478 **** } /* ! * Inserts value during normal insertion */ ! static uint32 ! ginHeapTupleInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value, ItemPointer item) { Datum *entries; int32 i, nentries; ! entries = ginExtractEntriesSU(ginstate, attnum, value, &nentries); ! ! if (nentries == 0) ! /* nothing to insert */ ! return 0; for (i = 0; i < nentries; i++) ! ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, NULL); ! ! return nentries; } Datum --- 544,568 ---- } /* ! * Insert index entries for a single indexable item during "normal" ! * (non-fast-update) insertion */ ! static void ! ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, ! Datum value, bool isNull, ! ItemPointer item) { Datum *entries; + GinNullCategory *categories; int32 i, nentries; ! entries = ginExtractEntries(ginstate, attnum, value, isNull, ! &nentries, &categories); for (i = 0; i < nentries; i++) ! ginEntryInsert(ginstate, attnum, entries[i], categories[i], ! item, 1, NULL); } Datum *************** gininsert(PG_FUNCTION_ARGS) *** 507,526 **** GinTupleCollector collector; memset(&collector, 0, sizeof(GinTupleCollector)); for (i = 0; i < ginstate.origTupdesc->natts; i++) ! if (!isnull[i]) ! ginHeapTupleFastCollect(index, &ginstate, &collector, ! (OffsetNumber) (i + 1), values[i], ht_ctid); ! ginHeapTupleFastInsert(index, &ginstate, &collector); } else { for (i = 0; i < ginstate.origTupdesc->natts; i++) ! if (!isnull[i]) ! ginHeapTupleInsert(index, &ginstate, ! (OffsetNumber) (i + 1), values[i], ht_ctid); ! } MemoryContextSwitchTo(oldCtx); --- 597,617 ---- GinTupleCollector collector; memset(&collector, 0, sizeof(GinTupleCollector)); + for (i = 0; i < ginstate.origTupdesc->natts; i++) ! ginHeapTupleFastCollect(&ginstate, &collector, ! (OffsetNumber) (i + 1), ! values[i], isnull[i], ! ht_ctid); ! ginHeapTupleFastInsert(&ginstate, &collector); } else { for (i = 0; i < ginstate.origTupdesc->natts; i++) ! ginHeapTupleInsert(&ginstate, (OffsetNumber) (i + 1), ! values[i], isnull[i], ! ht_ctid); } MemoryContextSwitchTo(oldCtx); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 2a39c4b3834a701ea59b5ac08d3729c2b5c1bf97..31a8015b70a71ddec637c544f4a6dc37e70f5e2b 100644 *** a/src/backend/access/gin/ginscan.c --- b/src/backend/access/gin/ginscan.c *************** *** 1,7 **** /*------------------------------------------------------------------------- * * ginscan.c ! * routines to manage scans inverted index relations * * * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group --- 1,7 ---- /*------------------------------------------------------------------------- * * ginscan.c ! * routines to manage scans of inverted index relations * * * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group *************** ginbeginscan(PG_FUNCTION_ARGS) *** 52,103 **** PG_RETURN_POINTER(scan); } static void ! fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query, ! Datum *entryValues, bool *partial_matches, uint32 nEntryValues, StrategyNumber strategy, Pointer *extra_data) { uint32 i, j; ! key->nentries = nEntryValues; ! key->entryRes = (bool *) palloc0(sizeof(bool) * nEntryValues); ! key->scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * nEntryValues); key->strategy = strategy; key->attnum = attnum; ! key->extra_data = extra_data; ! key->query = query; key->firstCall = TRUE; ItemPointerSetMin(&key->curItem); ! for (i = 0; i < nEntryValues; i++) { ! key->scanEntry[i].pval = key->entryRes + i; ! key->scanEntry[i].entry = entryValues[i]; ! key->scanEntry[i].attnum = attnum; ! key->scanEntry[i].extra_data = (extra_data) ? extra_data[i] : NULL; ! ItemPointerSetMin(&key->scanEntry[i].curItem); ! key->scanEntry[i].isFinished = FALSE; ! key->scanEntry[i].offset = InvalidOffsetNumber; ! key->scanEntry[i].buffer = InvalidBuffer; ! key->scanEntry[i].partialMatch = NULL; ! key->scanEntry[i].partialMatchIterator = NULL; ! key->scanEntry[i].partialMatchResult = NULL; ! key->scanEntry[i].strategy = strategy; ! key->scanEntry[i].list = NULL; ! key->scanEntry[i].nlist = 0; ! key->scanEntry[i].isPartialMatch = (ginstate->canPartialMatch[attnum - 1] && partial_matches) ? partial_matches[i] : false; ! /* link to the equals entry in current scan key */ ! key->scanEntry[i].master = NULL; for (j = 0; j < i; j++) if (ginCompareEntries(ginstate, attnum, ! entryValues[i], entryValues[j]) == 0 && ! key->scanEntry[i].isPartialMatch == key->scanEntry[j].isPartialMatch && ! key->scanEntry[i].strategy == key->scanEntry[j].strategy) { ! key->scanEntry[i].master = key->scanEntry + j; break; } } --- 52,115 ---- PG_RETURN_POINTER(scan); } + /* + * Initialize a GinScanKey using the output from the extractQueryFn + */ static void ! fillScanKey(GinState *ginstate, GinScanKey key, ! OffsetNumber attnum, Datum query, ! Datum *queryValues, GinNullCategory *queryCategories, ! bool *partial_matches, uint32 nQueryValues, StrategyNumber strategy, Pointer *extra_data) { uint32 i, j; ! key->nentries = nQueryValues; ! key->scanEntry = (GinScanEntry) palloc(sizeof(GinScanEntryData) * nQueryValues); ! key->entryRes = (bool *) palloc0(sizeof(bool) * nQueryValues); ! key->query = query; ! key->queryValues = queryValues; ! key->queryCategories = queryCategories; ! key->extra_data = extra_data; key->strategy = strategy; key->attnum = attnum; ! key->firstCall = TRUE; ItemPointerSetMin(&key->curItem); ! for (i = 0; i < nQueryValues; i++) { ! GinScanEntry scanEntry = key->scanEntry + i; ! ! scanEntry->parent = key; ! scanEntry->pval = key->entryRes + i; ! scanEntry->queryKey = queryValues[i]; ! scanEntry->queryCategory = queryCategories[i]; ! scanEntry->isPartialMatch = ! (ginstate->canPartialMatch[attnum - 1] && partial_matches) ? partial_matches[i] : false; + scanEntry->extra_data = (extra_data) ? extra_data[i] : NULL; ! ItemPointerSetMin(&scanEntry->curItem); ! scanEntry->isFinished = FALSE; ! scanEntry->offset = InvalidOffsetNumber; ! scanEntry->buffer = InvalidBuffer; ! scanEntry->list = NULL; ! scanEntry->nlist = 0; ! scanEntry->matchBitmap = NULL; ! scanEntry->matchIterator = NULL; ! scanEntry->matchResult = NULL; ! ! /* link to any preceding identical entry in current scan key */ ! scanEntry->master = NULL; for (j = 0; j < i; j++) if (ginCompareEntries(ginstate, attnum, ! queryValues[i], queryCategories[i], ! queryValues[j], queryCategories[j]) == 0 && ! scanEntry->isPartialMatch == key->scanEntry[j].isPartialMatch) { ! scanEntry->master = key->scanEntry + j; break; } } *************** resetScanKeys(GinScanKey keys, uint32 nk *** 132,140 **** key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].list = NULL; key->scanEntry[j].nlist = 0; ! key->scanEntry[j].partialMatch = NULL; ! key->scanEntry[j].partialMatchIterator = NULL; ! key->scanEntry[j].partialMatchResult = NULL; } } } --- 144,152 ---- key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].list = NULL; key->scanEntry[j].nlist = 0; ! key->scanEntry[j].matchBitmap = NULL; ! key->scanEntry[j].matchIterator = NULL; ! key->scanEntry[j].matchResult = NULL; } } } *************** freeScanKeys(GinScanKey keys, uint32 nke *** 159,168 **** ReleaseBuffer(key->scanEntry[j].buffer); if (key->scanEntry[j].list) pfree(key->scanEntry[j].list); ! if (key->scanEntry[j].partialMatchIterator) ! tbm_end_iterate(key->scanEntry[j].partialMatchIterator); ! if (key->scanEntry[j].partialMatch) ! tbm_free(key->scanEntry[j].partialMatch); } pfree(key->entryRes); --- 171,180 ---- ReleaseBuffer(key->scanEntry[j].buffer); if (key->scanEntry[j].list) pfree(key->scanEntry[j].list); ! if (key->scanEntry[j].matchIterator) ! tbm_end_iterate(key->scanEntry[j].matchIterator); ! if (key->scanEntry[j].matchBitmap) ! tbm_free(key->scanEntry[j].matchBitmap); } pfree(key->entryRes); *************** ginNewScanKey(IndexScanDesc scan) *** 179,205 **** GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; uint32 nkeys = 0; ! if (scan->numberOfKeys < 1) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("GIN indexes do not support whole-index scans"))); ! ! so->keys = (GinScanKey) palloc(scan->numberOfKeys * sizeof(GinScanKeyData)); so->isVoidRes = false; for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = &scankey[i]; ! Datum *entryValues; ! int32 nEntryValues = 0; bool *partial_matches = NULL; Pointer *extra_data = NULL; /* ! * Assume, that GIN-indexable operators are strict, so nothing could ! * be found */ if (skey->sk_flags & SK_ISNULL) { --- 191,217 ---- GinScanOpaque so = (GinScanOpaque) scan->opaque; int i; uint32 nkeys = 0; + uint32 nvalues = 0; + bool hasNullQuery = false; ! /* allocate an extra GinScanKeyData in case needed for whole-index scan */ ! so->keys = (GinScanKey) ! palloc((scan->numberOfKeys + 1) * sizeof(GinScanKeyData)); so->isVoidRes = false; for (i = 0; i < scan->numberOfKeys; i++) { ScanKey skey = &scankey[i]; ! Datum *queryValues; ! int32 nQueryValues = 0; bool *partial_matches = NULL; Pointer *extra_data = NULL; + bool *nullFlags = NULL; /* ! * We assume that GIN-indexable operators are strict, so a null ! * query argument means an unmatchable query. */ if (skey->sk_flags & SK_ISNULL) { *************** ginNewScanKey(IndexScanDesc scan) *** 207,221 **** break; } ! entryValues = (Datum *) ! DatumGetPointer(FunctionCall5(&so->ginstate.extractQueryFn[skey->sk_attno - 1], skey->sk_argument, ! PointerGetDatum(&nEntryValues), UInt16GetDatum(skey->sk_strategy), PointerGetDatum(&partial_matches), ! PointerGetDatum(&extra_data))); ! if (nEntryValues < 0) { /* * extractQueryFn signals that nothing can match, so we can just --- 219,235 ---- break; } ! /* OK to call the extractQueryFn */ ! queryValues = (Datum *) ! DatumGetPointer(FunctionCall6(&so->ginstate.extractQueryFn[skey->sk_attno - 1], skey->sk_argument, ! PointerGetDatum(&nQueryValues), UInt16GetDatum(skey->sk_strategy), PointerGetDatum(&partial_matches), ! PointerGetDatum(&extra_data), ! PointerGetDatum(&nullFlags))); ! if (nQueryValues < 0) { /* * extractQueryFn signals that nothing can match, so we can just *************** ginNewScanKey(IndexScanDesc scan) *** 225,252 **** break; } ! if (entryValues == NULL || nEntryValues == 0) { ! /* ! * extractQueryFn signals that everything matches. This would ! * require a full scan, which we can't do, but perhaps there is ! * another scankey that provides a restriction to use. So we keep ! * going and check only at the end. ! */ ! continue; } fillScanKey(&so->ginstate, &(so->keys[nkeys]), skey->sk_attno, skey->sk_argument, ! entryValues, partial_matches, nEntryValues, skey->sk_strategy, extra_data); nkeys++; } ! if (nkeys == 0 && !so->isVoidRes) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("GIN indexes do not support whole-index scans"))); so->nkeys = nkeys; --- 239,320 ---- break; } ! /* Force nQueryValues to zero if NULL is returned */ ! if (queryValues == NULL) ! nQueryValues = 0; ! ! /* ! * If the extractQueryFn didn't create a nullFlags array, create one, ! * assuming that everything's non-null. Otherwise, run through the ! * array and make sure each value is exactly 0 or 1; this ensures ! * binary compatibility with the GinNullCategory representation. ! * While at it, detect whether any null keys are present. ! */ ! if (nullFlags == NULL) ! nullFlags = (bool *) palloc0(nQueryValues * sizeof(bool)); ! else { ! int32 j; ! ! for (j = 0; j < nQueryValues; j++) ! { ! if (nullFlags[j]) ! { ! nullFlags[j] = true; /* not any other nonzero value */ ! hasNullQuery = true; ! } ! } } + /* now we can use the nullFlags as category codes */ fillScanKey(&so->ginstate, &(so->keys[nkeys]), skey->sk_attno, skey->sk_argument, ! queryValues, (GinNullCategory *) nullFlags, ! partial_matches, nQueryValues, skey->sk_strategy, extra_data); nkeys++; + nvalues += nQueryValues; } ! /* ! * If none of the scan keys gave rise to values that we can search for, ! * we have to do a full-index scan. Generate a dummy scan key to drive ! * the scan. The scan key has one value that's an EMPTY_QUERY null. ! */ ! if (nvalues == 0 && !so->isVoidRes) ! { ! Datum *values = (Datum *) palloc0(sizeof(Datum)); ! GinNullCategory *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory)); ! ! categories[0] = GIN_CAT_EMPTY_QUERY; ! ! hasNullQuery = true; ! ! fillScanKey(&so->ginstate, &(so->keys[nkeys]), ! FirstOffsetNumber, (Datum) 0, ! values, categories, ! NULL, 1, ! InvalidStrategy, NULL); ! nkeys++; ! } ! ! /* ! * If the index is version 0, it may be missing null and placeholder ! * entries, which would render searches for nulls and full-index scans ! * unreliable. Throw an error if so. ! */ ! if (hasNullQuery) ! { ! GinStatsData ginStats; ! ! ginGetStats(scan->indexRelation, &ginStats); ! if (ginStats.ginVersion < 1) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("old GIN indexes do not support whole-index scans nor searches for nulls"), ! errhint("To fix this, do REINDEX INDEX \"%s\".", ! RelationGetRelationName(scan->indexRelation)))); ! } so->nkeys = nkeys; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 4674606f7989eb03e457e82f955961bbc77698b3..c7d26c80607a674d9ace5c6c3d47061c7f7076ff 100644 *** a/src/backend/access/gin/ginutil.c --- b/src/backend/access/gin/ginutil.c *************** *** 24,49 **** #include "storage/indexfsm.h" #include "storage/lmgr.h" void initGinState(GinState *state, Relation index) { int i; ! state->origTupdesc = index->rd_att; ! state->oneCol = (index->rd_att->natts == 1) ? true : false; ! for (i = 0; i < index->rd_att->natts; i++) { ! state->tupdesc[i] = CreateTemplateTupleDesc(2, false); ! TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, ! INT2OID, -1, 0); ! TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, ! index->rd_att->attrs[i]->atttypid, ! index->rd_att->attrs[i]->atttypmod, ! index->rd_att->attrs[i]->attndims ! ); fmgr_info_copy(&(state->compareFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PROC), --- 24,62 ---- #include "storage/indexfsm.h" #include "storage/lmgr.h" + + /* + * initGinState: fill in an empty GinState struct to describe the index + * + * Note: assorted subsidiary data is allocated in the CurrentMemoryContext. + */ void initGinState(GinState *state, Relation index) { + TupleDesc origTupdesc = RelationGetDescr(index); int i; ! MemSet(state, 0, sizeof(GinState)); ! state->index = index; ! state->oneCol = (origTupdesc->natts == 1) ? true : false; ! state->origTupdesc = origTupdesc; ! for (i = 0; i < origTupdesc->natts; i++) { ! if (state->oneCol) ! state->tupdesc[i] = state->origTupdesc; ! else ! { ! state->tupdesc[i] = CreateTemplateTupleDesc(2, false); ! TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, ! INT2OID, -1, 0); ! TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, ! origTupdesc->attrs[i]->atttypid, ! origTupdesc->attrs[i]->atttypmod, ! origTupdesc->attrs[i]->attndims); ! } fmgr_info_copy(&(state->compareFn[i]), index_getprocinfo(index, i + 1, GIN_COMPARE_PROC), *************** initGinState(GinState *state, Relation i *** 82,90 **** OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple) { ! OffsetNumber colN = FirstOffsetNumber; ! if (!ginstate->oneCol) { Datum res; bool isnull; --- 95,108 ---- OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple) { ! OffsetNumber colN; ! if (ginstate->oneCol) ! { ! /* column number is not stored explicitly */ ! colN = FirstOffsetNumber; ! } ! else { Datum res; bool isnull; *************** gintuple_get_attrnum(GinState *ginstate, *** 105,117 **** } /* ! * Extract stored datum from GIN tuple */ Datum ! gin_index_getattr(GinState *ginstate, IndexTuple tuple) { - bool isnull; Datum res; if (ginstate->oneCol) { --- 123,136 ---- } /* ! * Extract stored datum (and possible null category) from GIN tuple */ Datum ! gintuple_get_key(GinState *ginstate, IndexTuple tuple, ! GinNullCategory *category) { Datum res; + bool isnull; if (ginstate->oneCol) { *************** gin_index_getattr(GinState *ginstate, In *** 134,140 **** &isnull); } ! Assert(!isnull); return res; } --- 153,162 ---- &isnull); } ! if (isnull) ! *category = GinGetNullCategory(tuple, ginstate); ! else ! *category = GIN_CAT_NORM_KEY; return res; } *************** gin_index_getattr(GinState *ginstate, In *** 144,150 **** * The returned buffer is already pinned and exclusive-locked * Caller is responsible for initializing the page by calling GinInitBuffer */ - Buffer GinNewBuffer(Relation index) { --- 166,171 ---- *************** GinInitMetabuffer(Buffer b) *** 233,328 **** metadata->nEntryPages = 0; metadata->nDataPages = 0; metadata->nEntries = 0; } int ! ginCompareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b) { return DatumGetInt32(FunctionCall2(&ginstate->compareFn[attnum - 1], a, b)); } int ! ginCompareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a, ! OffsetNumber attnum_b, Datum b) { ! if (attnum_a == attnum_b) ! return ginCompareEntries(ginstate, attnum_a, a, b); ! return (attnum_a < attnum_b) ? -1 : 1; } typedef struct { FmgrInfo *cmpDatumFunc; ! bool *needUnique; ! } cmpEntriesData; static int ! cmpEntries(const Datum *a, const Datum *b, cmpEntriesData *arg) { ! int res = DatumGetInt32(FunctionCall2(arg->cmpDatumFunc, ! *a, *b)); if (res == 0) ! *(arg->needUnique) = TRUE; return res; } Datum * ! ginExtractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *nentries, ! bool *needUnique) { Datum *entries; ! entries = (Datum *) DatumGetPointer(FunctionCall2( ! &ginstate->extractValueFn[attnum - 1], ! value, ! PointerGetDatum(nentries) ! )); ! if (entries == NULL) ! *nentries = 0; ! *needUnique = FALSE; ! if (*nentries > 1) { ! cmpEntriesData arg; ! ! arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1]; ! arg.needUnique = needUnique; ! qsort_arg(entries, *nentries, sizeof(Datum), ! (qsort_arg_comparator) cmpEntries, (void *) &arg); } ! return entries; ! } ! Datum * ! ginExtractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *nentries) ! { ! bool needUnique; ! Datum *entries = ginExtractEntriesS(ginstate, attnum, value, nentries, ! &needUnique); ! if (needUnique) ! { ! Datum *ptr, ! *res; ! ptr = res = entries; ! while (ptr - entries < *nentries) { ! if (ginCompareEntries(ginstate, attnum, *ptr, *res) != 0) ! *(++res) = *ptr++; ! else ! ptr++; } ! *nentries = res + 1 - entries; } return entries; --- 254,471 ---- metadata->nEntryPages = 0; metadata->nDataPages = 0; metadata->nEntries = 0; + metadata->ginVersion = GIN_CURRENT_VERSION; } + /* + * Compare two keys of the same index column + */ int ! ginCompareEntries(GinState *ginstate, OffsetNumber attnum, ! Datum a, GinNullCategory categorya, ! Datum b, GinNullCategory categoryb) { + /* if not of same null category, sort by that first */ + if (categorya != categoryb) + return (categorya < categoryb) ? -1 : 1; + + /* all null items in same category are equal */ + if (categorya != GIN_CAT_NORM_KEY) + return 0; + + /* both not null, so safe to call the compareFn */ return DatumGetInt32(FunctionCall2(&ginstate->compareFn[attnum - 1], a, b)); } + /* + * Compare two keys of possibly different index columns + */ int ! ginCompareAttEntries(GinState *ginstate, ! OffsetNumber attnuma, Datum a, GinNullCategory categorya, ! OffsetNumber attnumb, Datum b, GinNullCategory categoryb) { ! /* attribute number is the first sort key */ ! if (attnuma != attnumb) ! return (attnuma < attnumb) ? -1 : 1; ! return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb); } + + /* + * Support for sorting key datums in ginExtractEntries + * + * Note: we only have to worry about null and not-null keys here; + * ginExtractEntries never generates more than one placeholder null, + * so it doesn't have to sort those. + */ + typedef struct + { + Datum datum; + bool isnull; + } keyEntryData; + typedef struct { FmgrInfo *cmpDatumFunc; ! bool haveDups; ! } cmpEntriesArg; static int ! cmpEntries(const void *a, const void *b, void *arg) { ! const keyEntryData *aa = (const keyEntryData *) a; ! const keyEntryData *bb = (const keyEntryData *) b; ! cmpEntriesArg *data = (cmpEntriesArg *) arg; ! int res; + if (aa->isnull) + { + if (bb->isnull) + res = 0; /* NULL "=" NULL */ + else + res = 1; /* NULL ">" not-NULL */ + } + else if (bb->isnull) + res = -1; /* not-NULL "<" NULL */ + else + res = DatumGetInt32(FunctionCall2(data->cmpDatumFunc, + aa->datum, bb->datum)); + + /* + * Detect if we have any duplicates. If there are equal keys, qsort + * must compare them at some point, else it wouldn't know whether one + * should go before or after the other. + */ if (res == 0) ! data->haveDups = true; return res; } + + /* + * Extract the index key values from an indexable item + * + * The resulting key values are sorted, and any duplicates are removed. + * This avoids generating redundant index entries. + */ Datum * ! ginExtractEntries(GinState *ginstate, OffsetNumber attnum, ! Datum value, bool isNull, ! int32 *nentries, GinNullCategory **categories) { Datum *entries; + bool *nullFlags; + int32 i; ! /* ! * We don't call the extractValueFn on a null item. Instead generate a ! * placeholder. ! */ ! if (isNull) ! { ! *nentries = 1; ! entries = (Datum *) palloc(sizeof(Datum)); ! entries[0] = (Datum) 0; ! *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory)); ! (*categories)[0] = GIN_CAT_NULL_ITEM; ! return entries; ! } ! /* OK, call the opclass's extractValueFn */ ! nullFlags = NULL; /* in case extractValue doesn't set it */ ! entries = (Datum *) ! DatumGetPointer(FunctionCall3(&ginstate->extractValueFn[attnum - 1], ! value, ! PointerGetDatum(nentries), ! PointerGetDatum(&nullFlags))); ! /* ! * Generate a placeholder if the item contained no keys. ! */ ! if (entries == NULL || *nentries <= 0) { ! *nentries = 1; ! entries = (Datum *) palloc(sizeof(Datum)); ! entries[0] = (Datum) 0; ! *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory)); ! (*categories)[0] = GIN_CAT_EMPTY_ITEM; ! return entries; } ! /* ! * If the extractValueFn didn't create a nullFlags array, create one, ! * assuming that everything's non-null. Otherwise, run through the ! * array and make sure each value is exactly 0 or 1; this ensures ! * binary compatibility with the GinNullCategory representation. ! */ ! if (nullFlags == NULL) ! nullFlags = (bool *) palloc0(*nentries * sizeof(bool)); ! else ! { ! for (i = 0; i < *nentries; i++) ! nullFlags[i] = (nullFlags[i] ? true : false); ! } ! /* now we can use the nullFlags as category codes */ ! *categories = (GinNullCategory *) nullFlags; + /* + * If there's more than one key, sort and unique-ify. + * + * XXX Using qsort here is notationally painful, and the overhead is + * pretty bad too. For small numbers of keys it'd likely be better to + * use a simple insertion sort. + */ + if (*nentries > 1) + { + keyEntryData *keydata; + cmpEntriesArg arg; ! keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData)); ! for (i = 0; i < *nentries; i++) ! { ! keydata[i].datum = entries[i]; ! keydata[i].isnull = nullFlags[i]; ! } ! arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1]; ! arg.haveDups = false; ! qsort_arg(keydata, *nentries, sizeof(keyEntryData), ! cmpEntries, (void *) &arg); ! if (arg.haveDups) ! { ! /* there are duplicates, must get rid of 'em */ ! int32 j; ! entries[0] = keydata[0].datum; ! nullFlags[0] = keydata[0].isnull; ! j = 1; ! for (i = 1; i < *nentries; i++) ! { ! if (cmpEntries(&keydata[i-1], &keydata[i], &arg) != 0) ! { ! entries[j] = keydata[i].datum; ! nullFlags[j] = keydata[i].isnull; ! j++; ! } ! } ! *nentries = j; ! } ! else { ! /* easy, no duplicates */ ! for (i = 0; i < *nentries; i++) ! { ! entries[i] = keydata[i].datum; ! nullFlags[i] = keydata[i].isnull; ! } } ! pfree(keydata); } return entries; *************** ginoptions(PG_FUNCTION_ARGS) *** 361,367 **** * Fetch index's statistical data into *stats * * Note: in the result, nPendingPages can be trusted to be up-to-date, ! * but the other fields are as of the last VACUUM. */ void ginGetStats(Relation index, GinStatsData *stats) --- 504,510 ---- * Fetch index's statistical data into *stats * * Note: in the result, nPendingPages can be trusted to be up-to-date, ! * as can ginVersion; but the other fields are as of the last VACUUM. */ void ginGetStats(Relation index, GinStatsData *stats) *************** ginGetStats(Relation index, GinStatsData *** 380,385 **** --- 523,529 ---- stats->nEntryPages = metadata->nEntryPages; stats->nDataPages = metadata->nDataPages; stats->nEntries = metadata->nEntries; + stats->ginVersion = metadata->ginVersion; UnlockReleaseBuffer(metabuffer); } *************** ginGetStats(Relation index, GinStatsData *** 387,393 **** /* * Write the given statistics to the index's metapage * ! * Note: nPendingPages is *not* copied over */ void ginUpdateStats(Relation index, const GinStatsData *stats) --- 531,537 ---- /* * Write the given statistics to the index's metapage * ! * Note: nPendingPages and ginVersion are *not* copied over */ void ginUpdateStats(Relation index, const GinStatsData *stats) diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 3054030a2c45dda587deb420dfe52829a361eac5..5ed42eab43618c617f7cc165ddc06896d23b55de 100644 *** a/src/backend/access/gin/ginvacuum.c --- b/src/backend/access/gin/ginvacuum.c *************** ginVacuumPostingTreeLeaves(GinVacuumStat *** 190,196 **** /* saves changes about deleted tuple ... */ if (oldMaxOff != newMaxOff) { - START_CRIT_SECTION(); if (newMaxOff > 0) --- 190,195 ---- *************** ginVacuumEntryPage(GinVacuumState *gvs, *** 519,525 **** * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ ! roots[*nroot] = GinItemPointerGetBlockNumber(&itup->t_tid); (*nroot)++; } else if (GinGetNPosting(itup) > 0) --- 518,524 ---- * store posting tree's roots for further processing, we can't * vacuum it just now due to risk of deadlocks with scans/inserts */ ! roots[*nroot] = GinGetDownlink(itup); (*nroot)++; } else if (GinGetNPosting(itup) > 0) *************** ginVacuumEntryPage(GinVacuumState *gvs, *** 533,540 **** if (GinGetNPosting(itup) != newN) { - Datum value; OffsetNumber attnum; /* * Some ItemPointers was deleted, so we should remake our --- 532,540 ---- if (GinGetNPosting(itup) != newN) { OffsetNumber attnum; + Datum key; + GinNullCategory category; /* * Some ItemPointers was deleted, so we should remake our *************** ginVacuumEntryPage(GinVacuumState *gvs, *** 562,570 **** itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } - value = gin_index_getattr(&gvs->ginstate, itup); attnum = gintuple_get_attrnum(&gvs->ginstate, itup); ! itup = GinFormTuple(gvs->index, &gvs->ginstate, attnum, value, GinGetPosting(itup), newN, true); PageIndexTupleDelete(tmppage, i); --- 562,570 ---- itup = (IndexTuple) PageGetItem(tmppage, PageGetItemId(tmppage, i)); } attnum = gintuple_get_attrnum(&gvs->ginstate, itup); ! key = gintuple_get_key(&gvs->ginstate, itup, &category); ! itup = GinFormTuple(&gvs->ginstate, attnum, key, category, GinGetPosting(itup), newN, true); PageIndexTupleDelete(tmppage, i); *************** ginbulkdelete(PG_FUNCTION_ARGS) *** 606,612 **** /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ! ginInsertCleanup(index, &gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ --- 606,612 ---- /* Yes, so initialize stats to zeroes */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); /* and cleanup any pending inserts */ ! ginInsertCleanup(&gvs.ginstate, true, stats); } /* we'll re-count the tuples each time */ *************** ginbulkdelete(PG_FUNCTION_ARGS) *** 642,648 **** Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); ! blkno = GinItemPointerGetBlockNumber(&(itup)->t_tid); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); --- 642,648 ---- Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); ! blkno = GinGetDownlink(itup); Assert(blkno != InvalidBlockNumber); UnlockReleaseBuffer(buffer); *************** ginvacuumcleanup(PG_FUNCTION_ARGS) *** 719,725 **** if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ! ginInsertCleanup(index, &ginstate, true, stats); } PG_RETURN_POINTER(stats); } --- 719,725 ---- if (IsAutoVacuumWorkerProcess()) { initGinState(&ginstate, index); ! ginInsertCleanup(&ginstate, true, stats); } PG_RETURN_POINTER(stats); } *************** ginvacuumcleanup(PG_FUNCTION_ARGS) *** 732,738 **** { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ! ginInsertCleanup(index, &ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); --- 732,738 ---- { stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); initGinState(&ginstate, index); ! ginInsertCleanup(&ginstate, true, stats); } memset(&idxStat, 0, sizeof(idxStat)); diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 36f0233baade3bec23816a1189e7cf9134877302..f824cdb0d65d18368270f3219ec56607f165836f 100644 *** a/src/backend/access/gin/ginxlog.c --- b/src/backend/access/gin/ginxlog.c *************** ginRedoInsert(XLogRecPtr lsn, XLogRecord *** 152,158 **** itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert)); forgetIncompleteSplit(data->node, ! GinItemPointerGetBlockNumber(&itup->t_tid), data->updateBlkno); } } --- 152,158 ---- itup = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsert)); forgetIncompleteSplit(data->node, ! GinGetDownlink(itup), data->updateBlkno); } } *************** ginRedoInsert(XLogRecPtr lsn, XLogRecord *** 213,219 **** Assert(!GinPageIsLeaf(page)); Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page)); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset)); ! ItemPointerSet(&itup->t_tid, data->updateBlkno, InvalidOffsetNumber); } if (data->isDelete) --- 213,219 ---- Assert(!GinPageIsLeaf(page)); Assert(data->offset >= FirstOffsetNumber && data->offset <= PageGetMaxOffsetNumber(page)); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, data->offset)); ! GinSetDownlink(itup, data->updateBlkno); } if (data->isDelete) *************** ginRedoSplit(XLogRecPtr lsn, XLogRecord *** 270,276 **** if (data->isData) { char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit); ! Size sizeofitem = GinSizeOfItem(lpage); OffsetNumber i; ItemPointer bound; --- 270,276 ---- if (data->isData) { char *ptr = XLogRecGetData(record) + sizeof(ginxlogSplit); ! Size sizeofitem = GinSizeOfDataPageItem(lpage); OffsetNumber i; ItemPointer bound; *************** ginRedoVacuumPage(XLogRecPtr lsn, XLogRe *** 380,387 **** { if (GinPageIsData(page)) { ! memcpy(GinDataPageGetData(page), XLogRecGetData(record) + sizeof(ginxlogVacuumPage), ! GinSizeOfItem(page) *data->nitem); GinPageGetOpaque(page)->maxoff = data->nitem; } else --- 380,388 ---- { if (GinPageIsData(page)) { ! memcpy(GinDataPageGetData(page), ! XLogRecGetData(record) + sizeof(ginxlogVacuumPage), ! data->nitem * GinSizeOfDataPageItem(page)); GinPageGetOpaque(page)->maxoff = data->nitem; } else *************** static void *** 792,797 **** --- 793,799 ---- ginContinueSplit(ginIncompleteSplit *split) { GinBtreeData btree; + GinState ginstate; Relation reln; Buffer buffer; GinBtreeStack stack; *************** ginContinueSplit(ginIncompleteSplit *spl *** 813,819 **** if (split->rootBlkno == GIN_ROOT_BLKNO) { ! ginPrepareEntryScan(&btree, reln, InvalidOffsetNumber, (Datum) 0, NULL); btree.entry = ginPageGetLinkItup(buffer); } else --- 815,826 ---- if (split->rootBlkno == GIN_ROOT_BLKNO) { ! MemSet(&ginstate, 0, sizeof(ginstate)); ! ginstate.index = reln; ! ! ginPrepareEntryScan(&btree, ! InvalidOffsetNumber, (Datum) 0, GIN_CAT_NULL_KEY, ! &ginstate); btree.entry = ginPageGetLinkItup(buffer); } else diff --git a/src/include/access/gin.h b/src/include/access/gin.h index d07454af6850e07df3596746e6e6499ff8ee60f5..286c943e563e8ba5282a770ce4f3f7c901649a6e 100644 *** a/src/include/access/gin.h --- b/src/include/access/gin.h *************** *** 13,20 **** #include "access/genam.h" #include "access/itup.h" #include "access/xlog.h" - #include "utils/rbtree.h" #include "fmgr.h" /* --- 13,20 ---- #include "access/genam.h" #include "access/itup.h" #include "access/xlog.h" #include "fmgr.h" + #include "utils/rbtree.h" /* *************** *** 37,45 **** typedef struct GinPageOpaqueData { BlockNumber rightlink; /* next page if any */ ! OffsetNumber maxoff; /* number entries on GIN_DATA page; number of ! * heap ItemPointer on GIN_DATA|GIN_LEAF page ! * and number of records on GIN_DATA & * ~GIN_LEAF page. On GIN_LIST page, number of * heap tuples. */ uint16 flags; /* see bit definitions below */ --- 37,45 ---- typedef struct GinPageOpaqueData { BlockNumber rightlink; /* next page if any */ ! OffsetNumber maxoff; /* number entries on GIN_DATA page: number of ! * heap ItemPointers on GIN_DATA|GIN_LEAF page ! * or number of PostingItems on GIN_DATA & * ~GIN_LEAF page. On GIN_LIST page, number of * heap tuples. */ uint16 flags; /* see bit definitions below */ *************** typedef struct GinMetaPageData *** 87,99 **** BlockNumber nEntryPages; BlockNumber nDataPages; int64 nEntries; } GinMetaPageData; #define GinPageGetMeta(p) \ ((GinMetaPageData *) PageGetContents(p)) /* ! * Works on page */ #define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) ) --- 87,113 ---- BlockNumber nEntryPages; BlockNumber nDataPages; int64 nEntries; + + /* + * GIN version number (ideally this should have been at the front, but + * too late now. Don't move it!) + * + * Currently 1 (for indexes initialized in 9.1 or later) + * + * Version 0 (indexes initialized in 9.0 or before) is compatible but may + * be missing null entries, including both null keys and placeholders. + * Reject full-index-scan attempts on such indexes. + */ + int32 ginVersion; } GinMetaPageData; + #define GIN_CURRENT_VERSION 1 + #define GinPageGetMeta(p) \ ((GinMetaPageData *) PageGetContents(p)) /* ! * Macros for accessing a GIN index page's opaque data */ #define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) ) *************** typedef struct GinMetaPageData *** 149,158 **** (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \ GinItemPointerGetBlockNumber(p) != InvalidBlockNumber) typedef struct { ! BlockIdData child_blkno; /* use it instead of BlockNumber to save space ! * on page */ ItemPointerData key; } PostingItem; --- 163,175 ---- (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \ GinItemPointerGetBlockNumber(p) != InvalidBlockNumber) + /* + * Posting item in a non-leaf posting-tree page + */ typedef struct { ! /* We use BlockIdData not BlockNumber to avoid padding space wastage */ ! BlockIdData child_blkno; ItemPointerData key; } PostingItem; *************** typedef struct *** 163,185 **** BlockIdSet(&((pointer)->child_blkno), (blockNumber)) /* ! * Support work on IndexTuple on leaf pages */ #define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid) ! #define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,(n)) #define GIN_TREE_POSTING ((OffsetNumber)0xffff) ! #define GinIsPostingTree(itup) ( GinGetNPosting(itup)==GIN_TREE_POSTING ) ! #define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING ), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) ) #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) ! #define GinGetOrigSizePosting(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) ! #define GinSetOrigSizePosting(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)) ! #define GinGetPosting(itup) ( (ItemPointer)(( ((char*)(itup)) + SHORTALIGN(GinGetOrigSizePosting(itup)) )) ) #define GinMaxItemSize \ MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \ MAXALIGN(sizeof(GinPageOpaqueData))) / 3 - sizeof(ItemIdData))) /* * Data (posting tree) pages --- 180,235 ---- BlockIdSet(&((pointer)->child_blkno), (blockNumber)) /* ! * Category codes to distinguish placeholder nulls from ordinary NULL keys. ! * Note that the datatype size and the first two code values are chosen to be ! * compatible with the usual usage of bool isNull flags. ! * ! * GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is ! * chosen to sort before not after regular key values. ! */ ! typedef signed char GinNullCategory; ! ! #define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */ ! #define GIN_CAT_NULL_KEY 1 /* null key value */ ! #define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */ ! #define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */ ! #define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */ ! ! /* ! * Access macros for null category byte in entry tuples ! */ ! #define GinCategoryOffset(itup,ginstate) \ ! (IndexInfoFindDataOffset((itup)->t_info) + \ ! ((ginstate)->oneCol ? 0 : sizeof(int2))) ! #define GinGetNullCategory(itup,ginstate) \ ! (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate)))) ! #define GinSetNullCategory(itup,ginstate,c) \ ! (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c)) ! ! /* ! * Access macros for leaf-page entry tuples (see discussion in README) */ #define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid) ! #define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n) #define GIN_TREE_POSTING ((OffsetNumber)0xffff) ! #define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING) ! #define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) ) #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) ! #define GinGetPostingOffset(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) ! #define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n) ! #define GinGetPosting(itup) ((ItemPointer) ((char*)(itup) + GinGetPostingOffset(itup))) #define GinMaxItemSize \ MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \ MAXALIGN(sizeof(GinPageOpaqueData))) / 3 - sizeof(ItemIdData))) + /* + * Access macros for non-leaf entry tuples + */ + #define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) + #define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber) + /* * Data (posting tree) pages *************** typedef struct *** 187,203 **** #define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page)) #define GinDataPageGetData(page) \ (PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))) ! #define GinSizeOfItem(page) \ (GinPageIsLeaf(page) ? sizeof(ItemPointerData) : sizeof(PostingItem)) #define GinDataPageGetItem(page,i) \ ! (GinDataPageGetData(page) + ((i)-1) * GinSizeOfItem(page)) #define GinDataPageGetFreeSpace(page) \ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ - MAXALIGN(sizeof(ItemPointerData)) \ ! - GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \ - MAXALIGN(sizeof(GinPageOpaqueData))) /* * List pages */ --- 237,259 ---- #define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page)) #define GinDataPageGetData(page) \ (PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))) ! #define GinSizeOfDataPageItem(page) \ (GinPageIsLeaf(page) ? sizeof(ItemPointerData) : sizeof(PostingItem)) #define GinDataPageGetItem(page,i) \ ! (GinDataPageGetData(page) + ((i)-1) * GinSizeOfDataPageItem(page)) #define GinDataPageGetFreeSpace(page) \ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ - MAXALIGN(sizeof(ItemPointerData)) \ ! - GinPageGetOpaque(page)->maxoff * GinSizeOfDataPageItem(page) \ - MAXALIGN(sizeof(GinPageOpaqueData))) + #define GinMaxLeafDataItems \ + ((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \ + MAXALIGN(sizeof(ItemPointerData)) - \ + MAXALIGN(sizeof(GinPageOpaqueData))) \ + / sizeof(ItemPointerData)) + /* * List pages */ *************** typedef struct GinOptions *** 219,243 **** ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE) #define GIN_UNLOCK BUFFER_LOCK_UNLOCK #define GIN_SHARE BUFFER_LOCK_SHARE #define GIN_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE typedef struct GinState { FmgrInfo compareFn[INDEX_MAX_KEYS]; FmgrInfo extractValueFn[INDEX_MAX_KEYS]; FmgrInfo extractQueryFn[INDEX_MAX_KEYS]; FmgrInfo consistentFn[INDEX_MAX_KEYS]; FmgrInfo comparePartialFn[INDEX_MAX_KEYS]; /* optional method */ ! ! bool canPartialMatch[INDEX_MAX_KEYS]; /* can opclass perform ! * partial match (prefix ! * search)? */ ! ! TupleDesc tupdesc[INDEX_MAX_KEYS]; ! TupleDesc origTupdesc; ! bool oneCol; } GinState; /* XLog stuff */ --- 275,318 ---- ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE) + /* Macros for buffer lock/unlock operations */ #define GIN_UNLOCK BUFFER_LOCK_UNLOCK #define GIN_SHARE BUFFER_LOCK_SHARE #define GIN_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE + + /* + * GinState: working data structure describing the index being worked on + */ typedef struct GinState { + Relation index; + bool oneCol; /* true if single-column index */ + + /* + * origTupDesc is the nominal tuple descriptor of the index, ie, the i'th + * attribute shows the key type (not the input data type!) of the i'th + * index column. In a single-column index this describes the actual leaf + * index tuples. In a multi-column index, the actual leaf tuples contain + * a smallint column number followed by a key datum of the appropriate + * type for that column. We set up tupdesc[i] to describe the actual + * rowtype of the index tuples for the i'th column, ie, (int2, keytype). + * Note that in any case, leaf tuples contain more data than is known to + * the TupleDesc; see access/gin/README for details. + */ + TupleDesc origTupdesc; + TupleDesc tupdesc[INDEX_MAX_KEYS]; + + /* + * Per-index-column opclass support functions + */ FmgrInfo compareFn[INDEX_MAX_KEYS]; FmgrInfo extractValueFn[INDEX_MAX_KEYS]; FmgrInfo extractQueryFn[INDEX_MAX_KEYS]; FmgrInfo consistentFn[INDEX_MAX_KEYS]; FmgrInfo comparePartialFn[INDEX_MAX_KEYS]; /* optional method */ ! /* canPartialMatch[i] is true if comparePartialFn[i] is valid */ ! bool canPartialMatch[INDEX_MAX_KEYS]; } GinState; /* XLog stuff */ *************** extern Buffer GinNewBuffer(Relation inde *** 362,376 **** extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); extern void GinInitMetabuffer(Buffer b); ! extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b); ! extern int ginCompareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a, ! OffsetNumber attnum_b, Datum b); ! extern Datum *ginExtractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value, ! int32 *nentries, bool *needUnique); ! extern Datum *ginExtractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum value, int32 *nentries); - extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple); extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); /* * GinStatsData represents stats data for planner use --- 437,455 ---- extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); extern void GinInitMetabuffer(Buffer b); ! extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, ! Datum a, GinNullCategory categorya, ! Datum b, GinNullCategory categoryb); ! extern int ginCompareAttEntries(GinState *ginstate, ! OffsetNumber attnuma, Datum a, GinNullCategory categorya, ! OffsetNumber attnumb, Datum b, GinNullCategory categoryb); ! extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum, ! Datum value, bool isNull, ! int32 *nentries, GinNullCategory **categories); extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple); + extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple, + GinNullCategory *category); /* * GinStatsData represents stats data for planner use *************** typedef struct GinStatsData *** 382,387 **** --- 461,467 ---- BlockNumber nEntryPages; BlockNumber nDataPages; int64 nEntries; + int32 ginVersion; } GinStatsData; extern void ginGetStats(Relation index, GinStatsData *stats); *************** extern void ginUpdateStats(Relation inde *** 391,398 **** extern Datum ginbuild(PG_FUNCTION_ARGS); extern Datum ginbuildempty(PG_FUNCTION_ARGS); extern Datum gininsert(PG_FUNCTION_ARGS); ! extern void ginEntryInsert(Relation index, GinState *ginstate, ! OffsetNumber attnum, Datum value, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats); --- 471,478 ---- extern Datum ginbuild(PG_FUNCTION_ARGS); extern Datum ginbuildempty(PG_FUNCTION_ARGS); extern Datum gininsert(PG_FUNCTION_ARGS); ! extern void ginEntryInsert(GinState *ginstate, ! OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *items, uint32 nitem, GinStatsData *buildStats); *************** typedef struct GinBtreeStack *** 410,416 **** BlockNumber blkno; Buffer buffer; OffsetNumber off; ! /* predictNumber contains prediction number of pages on current level */ uint32 predictNumber; struct GinBtreeStack *parent; } GinBtreeStack; --- 490,496 ---- BlockNumber blkno; Buffer buffer; OffsetNumber off; ! /* predictNumber contains predicted number of pages on current level */ uint32 predictNumber; struct GinBtreeStack *parent; } GinBtreeStack; *************** typedef struct GinBtreeData *** 436,442 **** bool searchMode; Relation index; ! GinState *ginstate; bool fullScan; bool isBuild; --- 516,522 ---- bool searchMode; Relation index; ! GinState *ginstate; /* not valid in a data scan */ bool fullScan; bool isBuild; *************** typedef struct GinBtreeData *** 444,454 **** /* Entry options */ OffsetNumber entryAttnum; ! Datum entryValue; IndexTuple entry; bool isDelete; ! /* Data (posting tree) option */ ItemPointerData *items; uint32 nitem; uint32 curitem; --- 524,535 ---- /* Entry options */ OffsetNumber entryAttnum; ! Datum entryKey; ! GinNullCategory entryCategory; IndexTuple entry; bool isDelete; ! /* Data (posting tree) options */ ItemPointerData *items; uint32 nitem; uint32 curitem; *************** extern void ginInsertValue(GinBtree btre *** 464,475 **** extern void ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno); /* ginentrypage.c */ ! extern IndexTuple GinFormTuple(Relation index, GinState *ginstate, ! OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd, bool errorTooBig); extern void GinShortenTuple(IndexTuple itup, uint32 nipd); ! extern void ginPrepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, ! Datum value, GinState *ginstate); extern void ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern IndexTuple ginPageGetLinkItup(Buffer buf); --- 545,557 ---- extern void ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno); /* ginentrypage.c */ ! extern IndexTuple GinFormTuple(GinState *ginstate, ! OffsetNumber attnum, Datum key, GinNullCategory category, ItemPointerData *ipd, uint32 nipd, bool errorTooBig); extern void GinShortenTuple(IndexTuple itup, uint32 nipd); ! extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, ! Datum key, GinNullCategory category, ! GinState *ginstate); extern void ginEntryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern IndexTuple ginPageGetLinkItup(Buffer buf); *************** typedef struct *** 490,520 **** extern GinPostingTreeScan *ginPrepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode); ! extern void ginInsertItemPointer(GinPostingTreeScan *gdi, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats); extern Buffer ginScanBeginPostingTree(GinPostingTreeScan *gdi); extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void ginPrepareDataScan(GinBtree btree, Relation index); /* ginscan.c */ typedef struct GinScanEntryData *GinScanEntry; typedef struct GinScanEntryData { ! /* link to the equals entry in current scan key */ GinScanEntry master; ! /* ! * link to values reported to consistentFn, points to ! * GinScanKey->entryRes[i] ! */ bool *pval; ! /* entry, got from extractQueryFn */ ! Datum entry; ! OffsetNumber attnum; Pointer extra_data; /* Current page in posting tree */ --- 572,637 ---- extern GinPostingTreeScan *ginPrepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode); ! extern void ginInsertItemPointers(GinPostingTreeScan *gdi, ! ItemPointerData *items, uint32 nitem, ! GinStatsData *buildStats); extern Buffer ginScanBeginPostingTree(GinPostingTreeScan *gdi); extern void ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); extern void ginPrepareDataScan(GinBtree btree, Relation index); /* ginscan.c */ + /* + * GinScanKeyData describes a single GIN index qualification condition. + * It contains one GinScanEntryData for each key datum extracted from + * the qual by the extractQueryFn. + */ + typedef struct GinScanKeyData *GinScanKey; + typedef struct GinScanEntryData *GinScanEntry; + typedef struct GinScanKeyData + { + /* Number of entries in query (gotten from extractQueryFn) */ + uint32 nentries; + + /* array of GinScanEntryData, one per entry */ + GinScanEntry scanEntry; + + /* array of ItemPointer result, reported to consistentFn */ + bool *entryRes; + + /* other data needed for calling consistentFn */ + Datum query; + Datum *queryValues; + GinNullCategory *queryCategories; + Pointer *extra_data; + StrategyNumber strategy; + OffsetNumber attnum; + + /* scan status data */ + ItemPointerData curItem; + bool recheckCurItem; + + bool firstCall; + bool isFinished; + } GinScanKeyData; + typedef struct GinScanEntryData { ! /* back-link to the parent GinScanKeyData */ ! GinScanKey parent; ! ! /* link to any preceding identical entry in current scan key */ GinScanEntry master; ! /* ptr to value reported to consistentFn, points to parent->entryRes[i] */ bool *pval; ! /* query key and other information from extractQueryFn */ ! Datum queryKey; ! GinNullCategory queryCategory; ! bool isPartialMatch; Pointer extra_data; /* Current page in posting tree */ *************** typedef struct GinScanEntryData *** 523,534 **** /* current ItemPointer to heap */ ItemPointerData curItem; ! /* partial match support */ ! bool isPartialMatch; ! TIDBitmap *partialMatch; ! TBMIterator *partialMatchIterator; ! TBMIterateResult *partialMatchResult; ! StrategyNumber strategy; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; --- 640,649 ---- /* current ItemPointer to heap */ ItemPointerData curItem; ! /* for a partial-match or full-scan query, we accumulate all TIDs here */ ! TIDBitmap *matchBitmap; ! TBMIterator *matchIterator; ! TBMIterateResult *matchResult; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; *************** typedef struct GinScanEntryData *** 540,571 **** uint32 predictNumberResult; } GinScanEntryData; - typedef struct GinScanKeyData - { - /* Number of entries in query (got by extractQueryFn) */ - uint32 nentries; - - /* array of ItemPointer result, reported to consistentFn */ - bool *entryRes; - - /* array of scans per entry */ - GinScanEntry scanEntry; - Pointer *extra_data; - - /* for calling consistentFn(GinScanKey->entryRes, strategy, query) */ - StrategyNumber strategy; - Datum query; - OffsetNumber attnum; - - ItemPointerData curItem; - bool recheckCurItem; - - bool firstCall; - bool isFinished; - } GinScanKeyData; - - typedef GinScanKeyData *GinScanKey; - typedef struct GinScanOpaqueData { MemoryContext tempCtx; --- 655,660 ---- *************** extern Datum ginqueryarrayextract(PG_FUN *** 601,632 **** extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); /* ginbulk.c */ ! typedef struct EntryAccumulator { RBNode rbnode; ! Datum value; ! uint32 length; ! uint32 number; OffsetNumber attnum; bool shouldSort; ItemPointerData *list; ! } EntryAccumulator; typedef struct { GinState *ginstate; long allocatedMemory; ! uint32 length; ! EntryAccumulator *entryallocator; RBTree *tree; } BuildAccumulator; extern void ginInitBA(BuildAccumulator *accum); ! extern void ginInsertRecordBA(BuildAccumulator *accum, ! ItemPointer heapptr, ! OffsetNumber attnum, Datum *entries, int32 nentry); extern void ginBeginBAScan(BuildAccumulator *accum); ! extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n); /* ginfast.c */ --- 690,725 ---- extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); /* ginbulk.c */ ! typedef struct GinEntryAccumulator { RBNode rbnode; ! Datum key; ! GinNullCategory category; OffsetNumber attnum; bool shouldSort; ItemPointerData *list; ! uint32 maxcount; /* allocated size of list[] */ ! uint32 count; /* current number of list[] entries */ ! } GinEntryAccumulator; typedef struct { GinState *ginstate; long allocatedMemory; ! GinEntryAccumulator *entryallocator; ! uint32 eas_used; RBTree *tree; } BuildAccumulator; extern void ginInitBA(BuildAccumulator *accum); ! extern void ginInsertBAEntries(BuildAccumulator *accum, ! ItemPointer heapptr, OffsetNumber attnum, ! Datum *entries, GinNullCategory *categories, ! int32 nentries); extern void ginBeginBAScan(BuildAccumulator *accum); ! extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum, ! OffsetNumber *attnum, Datum *key, GinNullCategory *category, ! uint32 *n); /* ginfast.c */ *************** typedef struct GinTupleCollector *** 638,649 **** uint32 sumsize; } GinTupleCollector; ! extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate, GinTupleCollector *collector); ! extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate, GinTupleCollector *collector, ! OffsetNumber attnum, Datum value, ItemPointer item); ! extern void ginInsertCleanup(Relation index, GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats); #endif /* GIN_H */ --- 731,743 ---- uint32 sumsize; } GinTupleCollector; ! extern void ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector); ! extern void ginHeapTupleFastCollect(GinState *ginstate, GinTupleCollector *collector, ! OffsetNumber attnum, Datum value, bool isNull, ! ItemPointer ht_ctid); ! extern void ginInsertCleanup(GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats); #endif /* GIN_H */