diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 1b45a4c..9f899c7 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -92,6 +92,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = brinbuild; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index f07eedc..1bc91d2 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -49,6 +49,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = ginbuild; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index b8aa9bc..4ec34d5 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -69,6 +69,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = true; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = gistbuild; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 1fa087a..a2cf278 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -66,6 +66,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = INT4OID; amroutine->ambuild = hashbuild; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1dbefda..71d5fbc 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3411,6 +3411,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) * crosscheck - if not InvalidSnapshot, also check old tuple against this * wait - true if should wait for any conflicting update to commit/abort * hufd - output parameter, filled in failure cases (see below) + * unchanged_ind_cols - output parameter; bits set for unmodified columns + * that are indexed by indirect indexes * lockmode - output parameter, filled with lock mode acquired on tuple * * Normal, successful return value is HeapTupleMayBeUpdated, which @@ -3433,13 +3435,15 @@ simple_heap_delete(Relation relation, ItemPointer tid) HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd, LockTupleMode *lockmode) + HeapUpdateFailureData *hufd, Bitmapset **unchanged_ind_cols, + LockTupleMode *lockmode) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); Bitmapset *hot_attrs; Bitmapset *key_attrs; Bitmapset *id_attrs; + Bitmapset *indirect_attrs; Bitmapset *interesting_attrs; Bitmapset *modified_attrs; ItemId lp; @@ -3496,14 +3500,16 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, * Note that we get a copy here, so we need not worry about relcache flush * happening midway through. */ - hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_ALL); + hot_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_HOT); key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY); id_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_IDENTITY_KEY); + indirect_attrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_INDIRECT_INDEXES); interesting_attrs = bms_add_members(NULL, hot_attrs); interesting_attrs = bms_add_members(interesting_attrs, key_attrs); interesting_attrs = bms_add_members(interesting_attrs, id_attrs); - + interesting_attrs = bms_add_members(interesting_attrs, indirect_attrs); block = ItemPointerGetBlockNumber(otid); buffer = ReadBuffer(relation, block); @@ -3553,9 +3559,12 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, /* * Determine which columns are being modified by the update. */ - modified_attrs = HeapDetermineModifiedColumns(relation, interesting_attrs, &oldtup, newtup); + if (unchanged_ind_cols) + *unchanged_ind_cols = bms_union(key_attrs, + bms_int_members(indirect_attrs, + modified_attrs)); /* * If we're not updating any "key" column, we can grab a weaker lock type. @@ -3807,6 +3816,7 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(indirect_attrs); bms_free(modified_attrs); bms_free(interesting_attrs); return result; @@ -4278,6 +4288,7 @@ l2: bms_free(hot_attrs); bms_free(key_attrs); bms_free(id_attrs); + bms_free(indirect_attrs); bms_free(modified_attrs); bms_free(interesting_attrs); @@ -4366,18 +4377,24 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, * Check which columns are being updated. * * This simultaneously checks conditions for HOT updates, for FOR KEY - * SHARE updates, and REPLICA IDENTITY concerns. Since much of the time they - * will be checking very similar sets of columns, and doing the same tests on - * them, it makes sense to optimize and do them together. + * SHARE updates, for REPLICA IDENTITY concerns, and for indirect indexing + * concerns. Since much of the time they will be checking very similar sets of + * columns, and doing the same tests on them, it makes sense to optimize and do + * them together. * - * We receive three bitmapsets comprising the three sets of columns we're + * We receive four bitmapsets comprising the four sets of columns we're * interested in. Note these are destructively modified; that is OK since * this is invoked at most once in heap_update. * - * hot_result is set to TRUE if it's okay to do a HOT update (i.e. it does not - * modified indexed columns); key_result is set to TRUE if the update does not - * modify columns used in the key; id_result is set to TRUE if the update does - * not modify columns in any index marked as the REPLICA IDENTITY. + * satisfies_hot is set to TRUE if it's okay to do a HOT update (i.e. it does + * not modified indexed columns); satisfies_key is set to TRUE if the update + * does not modify columns used in the key; satisfies_id is set to TRUE if the + * update does not modify columns in any index marked as the REPLICA IDENTITY. + * + * unchanged_attrs is an output bitmapset that has a bit set if the + * corresponding column is indexed by an indirect index and is not modified. + * Note that because system columns cannot be indexed by indirect indexes, + * these values are not shifted by FirstLowInvalidHeapAttributeNumber. */ static Bitmapset * HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols, @@ -4417,7 +4434,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) result = heap_update(relation, otid, tup, GetCurrentCommandId(true), InvalidSnapshot, true /* wait for commit */ , - &hufd, &lockmode); + &hufd, NULL, &lockmode); switch (result) { case HeapTupleSelfUpdated: diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index fc4702c..07bf463 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -1614,6 +1614,7 @@ toast_save_datum(Relation rel, Datum value, /* Only index relations marked as ready can be updated */ if (IndexIsReady(toastidxs[i]->rd_index)) index_insert(toastidxs[i], t_values, t_isnull, + NULL, &(toasttup->t_self), toastrel, toastidxs[i]->rd_index->indisunique ? diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 65c941d..ef0c5bc 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -413,7 +413,7 @@ systable_getnext(SysScanDesc sysscan) if (sysscan->irel) { - htup = index_getnext(sysscan->iscan, ForwardScanDirection); + htup = index_getnext(sysscan->iscan, ForwardScanDirection, NULL); /* * We currently don't need to support lossy index operators for any @@ -593,7 +593,7 @@ systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) HeapTuple htup; Assert(sysscan->irel); - htup = index_getnext(sysscan->iscan, direction); + htup = index_getnext(sysscan->iscan, direction, NULL); /* See notes in systable_getnext */ if (htup && sysscan->iscan->xs_recheck) elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 54b71cb..202f64a 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -189,10 +189,13 @@ bool index_insert(Relation indexRelation, Datum *values, bool *isnull, + Datum *pkeyValues, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique) { + ItemPointerData iptr; + RELATION_CHECKS; CHECK_REL_PROCEDURE(aminsert); @@ -201,8 +204,19 @@ index_insert(Relation indexRelation, (HeapTuple) NULL, InvalidBuffer); + /* + * Indirect indexes use a fake item pointer constructed from the primary + * key values; regular indexes store the actual heap item pointer. + */ + if (!indexRelation->rd_index->indisindirect) + ItemPointerCopy(heap_t_ctid, &iptr); + else + FAKE_CTID_FROM_PKVALUES(&iptr, + indexRelation->rd_index->indnatts, + pkeyValues); + return indexRelation->rd_amroutine->aminsert(indexRelation, values, isnull, - heap_t_ctid, heapRelation, + &iptr, heapRelation, checkUnique); } @@ -444,6 +458,9 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * although when using an MVCC snapshot it should be impossible for more than * one such tuple to exist.) * + * If anyfound is not NULL, *anyfound is set to TRUE if there are live tuples + * matching the scan keys, even if they are not visible to the scan's snapshot. + * * On success, the buffer containing the heap tup is pinned (the pin will be * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan * call). @@ -454,7 +471,7 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) * ---------------- */ HeapTuple -index_fetch_heap(IndexScanDesc scan) +index_fetch_heap(IndexScanDesc scan, bool *anyfound) { ItemPointer tid = &scan->xs_ctup.t_self; bool all_dead = false; @@ -487,6 +504,10 @@ index_fetch_heap(IndexScanDesc scan) !scan->xs_continue_hot); LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + /* Let caller know there are live tuples, even if we can't see them */ + if (!all_dead && anyfound) + *anyfound = true; + if (got_heap_tuple) { /* @@ -524,13 +545,16 @@ index_fetch_heap(IndexScanDesc scan) * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan * call). * + * If anyfound is not NULL, *anyfound is set to TRUE if there are live tuples + * matching the scan keys, even if they are not visible to the scan snapshot. + * * Note: caller must check scan->xs_recheck, and perform rechecking of the * scan keys if required. We do not do that here because we don't have * enough information to do it efficiently in the general case. * ---------------- */ HeapTuple -index_getnext(IndexScanDesc scan, ScanDirection direction) +index_getnext(IndexScanDesc scan, ScanDirection direction, bool *anyfound) { HeapTuple heapTuple; ItemPointer tid; @@ -562,7 +586,7 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) * If we don't find anything, loop around and grab the next TID from * the index. */ - heapTuple = index_fetch_heap(scan); + heapTuple = index_fetch_heap(scan, anyfound); if (heapTuple != NULL) return heapTuple; } @@ -570,6 +594,56 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) return NULL; /* failure exit */ } +/* + * Return the primary key values from the next tuple in a indirect-index scan + * + * pkvals is an output array which must have been allocated by caller. + */ +void +index_getnext_pkey(IndexScanDesc scan, ScanDirection direction, Datum *pkvals, + bool *isDone) +{ + bool found; + Datum pkval; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgettuple); + + /* + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the primary key value into scan->xs_ctup.t_self, from + * which we extract it into the pkvals array. It should also set + * scan->xs_recheck and scan->xs_itup, though we pay no attention to those + * fields here. + */ + found = scan->indexRelation->rd_amroutine->amgettuple(scan, direction); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + + if (!found) + { + if (BufferIsValid(scan->xs_cbuf)) + { + ReleaseBuffer(scan->xs_cbuf); + scan->xs_cbuf = InvalidBuffer; + } + *isDone = true; + return; + } + *isDone = false; + + /* + * XXX this assumes that Datum is 8 bytes. Which it is in my machine .. but + * not elsewhere + */ + /* XXX see FAKE_CTID_FROM_PKVALUES */ + pkval = (((Datum) scan->xs_ctup.t_self.ip_posid) >> 1) | + (((Datum) scan->xs_ctup.t_self.ip_blkid.bi_hi) << 15) | + (((Datum) scan->xs_ctup.t_self.ip_blkid.bi_lo >> 1) << 32); + pkvals[0] = pkval; +} + /* ---------------- * index_getbitmap - get all tuples at once from an index scan * diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index ef69290..eb4beef 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -92,7 +92,9 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); * By here, itup is filled in, including the TID. * * If checkUnique is UNIQUE_CHECK_NO or UNIQUE_CHECK_PARTIAL, this - * will allow duplicates. Otherwise (UNIQUE_CHECK_YES or + * will allow duplicates. If it's UNIQUE_CHECK_INSERT_SINGLETON, the value + * will only be inserted if there isn't already a tuple with that value. + * Otherwise (UNIQUE_CHECK_YES or * UNIQUE_CHECK_EXISTING) it will throw error for a duplicate. * For UNIQUE_CHECK_EXISTING we merely run the duplicate check, and * don't actually insert. @@ -100,8 +102,8 @@ static void _bt_vacuum_one_page(Relation rel, Buffer buffer, Relation heapRel); * The result value is only significant for UNIQUE_CHECK_PARTIAL: * it must be TRUE if the entry is known unique, else FALSE. * (In the current implementation we'll also return TRUE after a - * successful UNIQUE_CHECK_YES or UNIQUE_CHECK_EXISTING call, but - * that's just a coding artifact.) + * successful UNIQUE_CHECK_YES, UNIQUE_CHECK_EXISTING or + * UNIQUE_CHECK_INSERT_SINGLETON call, but that's just a coding artifact.) */ bool _bt_doinsert(Relation rel, IndexTuple itup, @@ -138,6 +140,21 @@ top: true, stack, BT_WRITE, NULL); /* + * In insert-singleton mode, we must return without doing anything if the + * value we're inserting already exists. + */ +#if 0 + if (checkUnique == UNIQUE_CHECK_INSERT_SINGLETON) + { + offset = _bt_binsrch( .. ); + if (offset is valid and contains a tuple matching the scankey) + return true; + /* otherwise fall through to insert */ + } +#endif + + + /* * If we're not allowing duplicates, make sure the key isn't already in * the index. * @@ -158,7 +175,8 @@ top: * let the tuple in and return false for possibly non-unique, or true for * definitely unique. */ - if (checkUnique != UNIQUE_CHECK_NO) + if (checkUnique != UNIQUE_CHECK_NO && + checkUnique != UNIQUE_CHECK_INSERT_SINGLETON) { TransactionId xwait; uint32 speculativeToken; @@ -167,6 +185,10 @@ top: xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey, checkUnique, &is_unique, &speculativeToken); + if (checkUnique == UNIQUE_CHECK_INSERT_SINGLETON && + TransactionIdIsValid(xwait)) + return true; + if (TransactionIdIsValid(xwait)) { /* Have to wait for the other guy ... */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index a264b92..29c6ae6 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -37,6 +37,7 @@ typedef struct { bool isUnique; bool haveDead; + bool isIndirect; Relation heapRel; BTSpool *spool; @@ -45,6 +46,8 @@ typedef struct * put into spool2 instead of spool in order to avoid uniqueness check. */ BTSpool *spool2; + int16 pkNumKeys; + AttrNumber pkAttnums[INDEX_MAX_KEYS]; double indtuples; } BTBuildState; @@ -98,6 +101,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = true; amroutine->ampredlocks = true; + amroutine->amcanindirect = true; amroutine->amkeytype = InvalidOid; amroutine->ambuild = btbuild; @@ -136,6 +140,25 @@ btbuild(Relation heap, Relation index, IndexInfo *indexInfo) buildstate.heapRel = heap; buildstate.spool = NULL; buildstate.spool2 = NULL; + buildstate.isIndirect = indexInfo->ii_IsIndirect; + if (indexInfo->ii_IsIndirect) + { + Oid pkOid; + Relation pkRel; + int i; + + pkOid = RelationGetPrimaryKey(heap); + pkRel = index_open(pkOid, AccessShareLock); + + buildstate.pkNumKeys = pkRel->rd_index->indnatts; + for (i = 0; i < buildstate.pkNumKeys; i++) + buildstate.pkAttnums[i] = pkRel->rd_index->indkey.values[i]; + index_close(pkRel, AccessShareLock); + } + else + { + buildstate.pkNumKeys = 0; + } buildstate.indtuples = 0; #ifdef BTREE_BUILD_STATS @@ -213,18 +236,42 @@ btbuildCallback(Relation index, void *state) { BTBuildState *buildstate = (BTBuildState *) state; + ItemPointerData iptr; + + if (buildstate->isIndirect) + { + Datum pkValues[INDEX_MAX_KEYS]; + int i; + bool isnull; + + /* + * XXX WAG: this is very slow in the general case, but OK if PK column + * is first. + */ + for (i = 0; i < buildstate->pkNumKeys; i++) + { + pkValues[i] = heap_getattr(htup, + buildstate->pkAttnums[i], + RelationGetDescr(buildstate->heapRel), + &isnull); + Assert(!isnull); + } + FAKE_CTID_FROM_PKVALUES(&iptr, buildstate->pkNumKeys, pkValues); + } + else + ItemPointerCopy(&htup->t_self, &iptr); /* * insert the index tuple into the appropriate spool file for subsequent * processing */ if (tupleIsAlive || buildstate->spool2 == NULL) - _bt_spool(buildstate->spool, &htup->t_self, values, isnull); + _bt_spool(buildstate->spool, &iptr, values, isnull); else { /* dead tuples are put into spool2 */ buildstate->haveDead = true; - _bt_spool(buildstate->spool2, &htup->t_self, values, isnull); + _bt_spool(buildstate->spool2, &iptr, values, isnull); } buildstate->indtuples += 1; diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index d570ae5..9378919 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -48,6 +48,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->amstorage = false; amroutine->amclusterable = false; amroutine->ampredlocks = false; + amroutine->amcanindirect = false; amroutine->amkeytype = InvalidOid; amroutine->ambuild = spgbuild; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 08b0989..4ca563a 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -623,6 +623,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid); values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs); values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique); + values[Anum_pg_index_indisindirect - 1] = BoolGetDatum(indexInfo->ii_IsIndirect); values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary); values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion); values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate); @@ -769,6 +770,26 @@ index_create(Relation heapRelation, errmsg("concurrent index creation on system catalog tables is not supported"))); /* + * indirect indexes are forbidden on system catalogs, and they obviously cannot + * be primary keys either. + */ + if (indexInfo->ii_IsIndirect && IsSystemRelation(heapRelation)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("indirect index creation on system catalog tables is not supported"))); + if (indexInfo->ii_IsIndirect && isprimary) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("primary key indexes cannot be indirect"))); + if (indexInfo->ii_IsIndirect && !OidIsValid(RelationGetPrimaryKey(heapRelation))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("table \"%s\" does not have a primary key", + RelationGetRelationName(heapRelation)), + errtable(heapRelation))); + /* XXX other restrictions needed? */ + + /* * This case is currently not supported, but there's no way to ask for it * in the grammar anyway, so it can't happen. */ @@ -1011,6 +1032,14 @@ index_create(Relation heapRelation, Assert(!initdeferred); } + /* Store dependency on primary key index, if needed */ + if (indexInfo->ii_IsIndirect) + { + ObjectAddressSet(referenced, RelationRelationId, + RelationGetPrimaryKey(heapRelation)); + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); + } + /* Store dependency on collations */ /* The default collation is pinned, so don't bother recording it */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) @@ -1681,6 +1710,7 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; + ii->ii_IsIndirect = indexStruct->indisindirect; ii->ii_ReadyForInserts = IndexIsReady(indexStruct); /* assume not doing speculative insertion for now */ ii->ii_UniqueOps = NULL; @@ -3161,6 +3191,7 @@ validate_index_heapscan(Relation heapRelation, index_insert(indexRelation, values, isnull, + NULL, /* FIXME need to PK values here */ &rootTuple, heapRelation, indexInfo->ii_Unique ? @@ -3564,7 +3595,9 @@ reindex_relation(Oid relid, int flags, int options) /* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */ if (is_pg_class) + { (void) RelationGetIndexAttrBitmap(rel, INDEX_ATTR_BITMAP_ALL); + } PG_TRY(); { diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index b9fe102..f00f446 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -136,6 +136,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple) index_insert(relationDescs[i], /* index relation */ values, /* array of index Datums */ isnull, /* is-null flags */ + NULL, /* catalogs never had indirect indexes */ &(heapTuple->t_self), /* tid of heap tuple */ heapRelation, relationDescs[i]->rd_index->indisunique ? diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 2131226..21b63bb 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -946,7 +946,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (indexScan != NULL) { - tuple = index_getnext(indexScan, ForwardScanDirection); + tuple = index_getnext(indexScan, ForwardScanDirection, NULL); if (tuple == NULL) break; @@ -1531,14 +1531,14 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, reindex_relation(OIDOldHeap, reindex_flags, 0); /* - * If the relation being rebuild is pg_class, swap_relation_files() + * If the relation being rebuilt is pg_class, swap_relation_files() * couldn't update pg_class's own pg_class entry (check comments in * swap_relation_files()), thus relfrozenxid was not updated. That's * annoying because a potential reason for doing a VACUUM FULL is a * imminent or actual anti-wraparound shutdown. So, now that we can - * access the new relation using it's indices, update relfrozenxid. + * access the new relation using its indices, update relfrozenxid. * pg_class doesn't have a toast relation, so we don't need to update the - * corresponding toast relation. Not that there's little point moving all + * corresponding toast relation. Note that there's little point moving all * relfrozenxid updates here since swap_relation_files() needs to write to * pg_class for non-mapped relations anyway. */ diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c index 26f9114..5f1a7b7 100644 --- a/src/backend/commands/constraint.c +++ b/src/backend/commands/constraint.c @@ -164,7 +164,7 @@ unique_key_recheck(PG_FUNCTION_ARGS) * correct even if t_self is now dead, because that is the TID the * index will know about. */ - index_insert(indexRel, values, isnull, &(new_row->t_self), + index_insert(indexRel, values, isnull, NULL, &(new_row->t_self), trigdata->tg_relation, UNIQUE_CHECK_EXISTING); } else diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index aa25a23..c5d185a 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2666,9 +2666,10 @@ CopyFrom(CopyState cstate) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), estate, - false, + false, false, NULL, - NIL); + NIL, + NULL); /* AFTER ROW INSERT Triggers */ ExecARInsertTriggers(estate, resultRelInfo, tuple, @@ -2820,7 +2821,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false); recheckIndexes = ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self), - estate, false, NULL, NIL); + estate, false, false, NULL, NIL, NULL); ExecARInsertTriggers(estate, resultRelInfo, bufferedTuples[i], recheckIndexes); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 0a669d9..853f155 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -884,6 +884,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_IndexScan: pname = sname = "Index Scan"; break; + case T_IndirectIndexScan: + pname = sname = "Indirect Index Scan"; + break; case T_IndexOnlyScan: pname = sname = "Index Only Scan"; break; @@ -1099,6 +1102,15 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainScanTarget((Scan *) indexscan, es); } break; + case T_IndirectIndexScan: + { + IndirectIndexScan *indexscan = (IndirectIndexScan *) plan; + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; case T_IndexOnlyScan: { IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; @@ -1301,6 +1313,19 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_IndirectIndexScan: + show_scan_qual(((IndirectIndexScan *) plan)->indexqualorig, + "Index Cond", planstate, ancestors, es); + if (((IndirectIndexScan *) plan)->indexqualorig) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndirectIndexScan *) plan)->indexorderbyorig, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; case T_IndexOnlyScan: show_scan_qual(((IndexOnlyScan *) plan)->indexqual, "Index Cond", planstate, ancestors, es); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index eeb2b1f..4a7f79b 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -522,6 +522,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("access method \"%s\" does not support exclusion constraints", accessMethodName))); + if (stmt->isindirect && !amRoutine->amcanindirect) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support indirect indexes", + accessMethodName))); amcanorder = amRoutine->amcanorder; amoptions = amRoutine->amoptions; @@ -557,6 +562,7 @@ DefineIndex(Oid relationId, indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; indexInfo->ii_Unique = stmt->unique; + indexInfo->ii_IsIndirect = stmt->isindirect; /* In a concurrent build, mark it not-ready-for-inserts */ indexInfo->ii_ReadyForInserts = !stmt->concurrent; indexInfo->ii_Concurrent = stmt->concurrent; diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index 51edd4c..799b4b6 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -19,6 +19,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeCustom.o nodeGather.o \ nodeHash.o nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeIndirectIndexscan.o \ nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 2587ef7..bcae700 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -32,6 +32,7 @@ #include "executor/nodeHashjoin.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" +#include "executor/nodeIndirectIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" @@ -170,6 +171,10 @@ ExecReScan(PlanState *node) ExecReScanIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecReScanIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); break; @@ -300,6 +305,10 @@ ExecMarkPos(PlanState *node) ExecIndexMarkPos((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecIndirectIndexMarkPos((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); break; @@ -349,6 +358,9 @@ ExecRestrPos(PlanState *node) ExecIndexRestrPos((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecIndirectIndexRestrPos((IndirectIndexScanState *) node); + case T_IndexOnlyScanState: ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); break; @@ -393,6 +405,7 @@ ExecSupportsMarkRestore(Path *pathnode) switch (pathnode->pathtype) { case T_IndexScan: + case T_IndirectIndexScan: case T_IndexOnlyScan: case T_Material: case T_Sort: @@ -491,6 +504,11 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); + case T_IndirectIndexScan: + /* FIXME this is expected to work, but is untested */ + return IndexSupportsBackwardScan(((IndirectIndexScan *) node)->indexid) && + TargetListSupportsBackwardScan(node->targetlist); + case T_IndexOnlyScan: return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); @@ -537,8 +555,8 @@ TargetListSupportsBackwardScan(List *targetlist) } /* - * An IndexScan or IndexOnlyScan node supports backward scan only if the - * index's AM does. + * An IndexScan, IndirectIndexScan or IndexOnlyScan node supports backward scan + * only if the index's AM does. */ static bool IndexSupportsBackwardScan(Oid indexid) diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 009c1b7..02c7d17 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -206,6 +206,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) if (speculative && ii->ii_Unique) BuildSpeculativeIndexInfo(indexDesc, ii); + if (ii->ii_IsIndirect) + resultRelInfo->ri_hasIndirectIndexes = true; + + /* Remember which of these indexes is the table's primary key */ + if (indexDesc->rd_index->indisprimary) + resultRelInfo->ri_PrimaryKeyIndex = i; + relationDescs[i] = indexDesc; indexInfoArray[i] = ii; i++; @@ -259,21 +266,23 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * the same is done for non-deferred constraints, but report * if conflict was speculative or deferred conflict to caller) * + * If 'isHOTupdate' is TRUE, the new tuple comes from a HOT update; + * only add entries to indirect indexes in that case, and only if + * the indexed values changed. + * * If 'arbiterIndexes' is nonempty, noDupErr applies only to * those indexes. NIL means noDupErr applies to all indexes. - * - * CAUTION: this must not be called for a HOT update. - * We can't defend against that here for lack of info. - * Should we change the API to make it safer? * ---------------------------------------------------------------- */ List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, EState *estate, + bool isHOTupdate, bool noDupErr, bool *specConflict, - List *arbiterIndexes) + List *arbiterIndexes, + Bitmapset *unchangedAttrs) { List *result = NIL; ResultRelInfo *resultRelInfo; @@ -285,6 +294,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot, ExprContext *econtext; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; + Datum pkeyValues[INDEX_MAX_KEYS]; /* * Get information from the result relation info structure. @@ -324,6 +334,20 @@ ExecInsertIndexTuples(TupleTableSlot *slot, if (!indexInfo->ii_ReadyForInserts) continue; + /* + * If this is the primary key, form the values array if there are any + * indirect indexes. We will need it even if this is a HOT update. + * This coding assumes that the primary key appears before indirect + * indexes in the index list. + */ + if (resultRelInfo->ri_hasIndirectIndexes && + (i == resultRelInfo->ri_PrimaryKeyIndex)) + FormIndexDatum(indexInfo, slot, estate, pkeyValues, isnull); + + /* If the index is not indirect, then HOT updates mustn't insert anything */ + if (isHOTupdate && !indexInfo->ii_IsIndirect) + continue; + /* Check for partial index */ if (indexInfo->ii_Predicate != NIL) { @@ -348,6 +372,34 @@ ExecInsertIndexTuples(TupleTableSlot *slot, } /* + * For indirect indexes, verify whether the indexed attributes have + * changed; if they have not, skip the insertion. + * + * When unchangedAttrs is NULL, we cannot skip the insertion (it only + * becomes set for updates.) + */ + if (indexInfo->ii_IsIndirect && unchangedAttrs) + { + int j; + bool may_skip_insertion = true; + + for (j = 0; j < indexInfo->ii_NumIndexAttrs; j++) + { + /* + * FIXME this assumes indirect indexes are only on columns, not + * expressions. + */ + if (bms_is_member(indexInfo->ii_KeyAttrNumbers[j], unchangedAttrs)) + continue; + may_skip_insertion = false; + } + + /* may skip insertion if no indexed attribute changed value */ + if (may_skip_insertion) + continue; + } + + /* * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ @@ -389,6 +441,7 @@ ExecInsertIndexTuples(TupleTableSlot *slot, index_insert(indexRelation, /* index relation */ values, /* array of index Datums */ isnull, /* null flags */ + pkeyValues, /* values of primary key */ tupleid, /* tid of heap tuple */ heapRelation, /* heap relation */ checkUnique); /* type of uniqueness check to do */ @@ -726,7 +779,7 @@ retry: index_rescan(index_scan, scankeys, index_natts, NULL, 0); while ((tup = index_getnext(index_scan, - ForwardScanDirection)) != NULL) + ForwardScanDirection, NULL)) != NULL) { TransactionId xwait; ItemPointerData ctid_wait; diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index bca34a5..370cdc0 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1252,6 +1252,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_FdwRoutine = NULL; resultRelInfo->ri_FdwState = NULL; resultRelInfo->ri_usesFdwDirectModify = false; + resultRelInfo->ri_hasIndirectIndexes = false; resultRelInfo->ri_ConstraintExprs = NULL; resultRelInfo->ri_junkFilter = NULL; resultRelInfo->ri_projectReturning = NULL; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 554244f..9df9160 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -93,6 +93,7 @@ #include "executor/nodeHashjoin.h" #include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" +#include "executor/nodeIndirectIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" #include "executor/nodeMaterial.h" @@ -203,6 +204,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_IndirectIndexScan: + result = (PlanState *) ExecInitIndirectIndexScan((IndirectIndexScan *) node, + estate, eflags); + break; + case T_IndexOnlyScan: result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, estate, eflags); @@ -427,6 +433,10 @@ ExecProcNode(PlanState *node) result = ExecIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + result = ExecIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: result = ExecIndexOnlyScan((IndexOnlyScanState *) node); break; @@ -677,6 +687,10 @@ ExecEndNode(PlanState *node) ExecEndIndexScan((IndexScanState *) node); break; + case T_IndirectIndexScanState: + ExecEndIndirectIndexScan((IndirectIndexScanState *) node); + break; + case T_IndexOnlyScanState: ExecEndIndexOnlyScan((IndexOnlyScanState *) node); break; diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 4f6f91c..c06ff83 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -122,7 +122,7 @@ IndexOnlyNext(IndexOnlyScanState *node) * Rats, we have to visit the heap to check visibility. */ node->ioss_HeapFetches++; - tuple = index_fetch_heap(scandesc); + tuple = index_fetch_heap(scandesc, NULL); if (tuple == NULL) continue; /* no visible tuple, try next index entry */ diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 3143bd9..df115f8 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -102,7 +102,7 @@ IndexNext(IndexScanState *node) /* * ok, now that we have what we need, fetch the next tuple. */ - while ((tuple = index_getnext(scandesc, direction)) != NULL) + while ((tuple = index_getnext(scandesc, direction, NULL)) != NULL) { /* * Store the scanned tuple in the scan tuple slot of the scan state. @@ -212,7 +212,7 @@ IndexNextWithReorder(IndexScanState *node) * Fetch next tuple from the index. */ next_indextuple: - tuple = index_getnext(scandesc, ForwardScanDirection); + tuple = index_getnext(scandesc, ForwardScanDirection, NULL); if (!tuple) { /* diff --git a/src/backend/executor/nodeIndirectIndexscan.c b/src/backend/executor/nodeIndirectIndexscan.c new file mode 100644 index 0000000..5df91e8 --- /dev/null +++ b/src/backend/executor/nodeIndirectIndexscan.c @@ -0,0 +1,542 @@ +/*------------------------------------------------------------------------- + * + * nodeIndirectIndexscan.c + * Routines to support indirect index scans + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeIndirectIndexscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecIndirectIndexScan scans an index + * IndirectIndexNext retrieve next tuple + * ExecInitIndirectIndexScan creates and initializes state info. + * ExecReScanIndirectIndexScan rescans the indexed relation. + * ExecEndIndirectIndexScan releases all storage. + * ExecIndirectIndexMarkPos marks scan position. + * ExecIndirectIndexRestrPos restores scan position. + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "executor/execdebug.h" +#include "executor/nodeIndirectIndexscan.h" +#include "executor/nodeIndexscan.h" +#include "storage/bufmgr.h" +#include "storage/predicate.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +/* ---------------------------------------------------------------- + * IndirectIndexNext + * + * Retrieve a tuple from the IndirectIndexScan node's index. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndirectIndexNext(IndirectIndexScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndirectIndexScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + /*---------- + * OK, now that we have what we need, fetch the next tuple. + * + * if we don't have a current indirect tuple: + * obtain tuple from indirect index into slot + * if no tuple is obtained, return "we're done" + * apply rescan on PK, set tuple as scankeys + * obtain tuple from PK + * if no tuple is obtained: + * clear the indirect slot + * start over + * return tuple + *---------- + */ + for (;;) + { + HeapTuple pktuple; + + /* + * If we haven't read a tuple from the indirect index, do so now and + * install it as scan keys into the primary key scan. + */ + if (!node->iiss_HaveIndirectTuple) + { + ScanKeyData pkkeys[INDEX_MAX_KEYS]; + Datum pkvalues[INDEX_MAX_KEYS]; + int nkeys = node->iiss_PKRelationDesc->rd_index->indnatts; + bool isDone; + int i; + + /* + * Obtain one tuple from the indirect index. If there are no more + * tuples in the indirect index, then the scan is done. + */ + index_getnext_pkey(node->iiss_IndScanDesc, direction, pkvalues, &isDone); + if (isDone) + return ExecClearTuple(slot); /* no more tuples */ + + /* + * We now have a tuple, but we don't know if it's worthy of being + * kept around. Initialize flag to have it killed until we know + * better. + */ + node->iiss_HaveIndirectTuple = true; + node->iiss_IndirectTupleIsLive = false; + + /* Install the primary key scan keys, and get it started */ + for (i = 0; i < nkeys; i++) + { + ScanKeyInit(&pkkeys[i], + i + 1, + BTEqualStrategyNumber, + node->iiss_EqualFuncs[i], + pkvalues[i]); + } + + index_rescan(node->iiss_PKScanDesc, pkkeys, nkeys, + NULL, 0); + } + + /* + * By now, we have a valid tuple from the indirect index and need to + * read (more?) tuples from the PK to return. + */ + pktuple = index_getnext(node->iiss_PKScanDesc, ForwardScanDirection, + &node->iiss_IndirectTupleIsLive); + + if (pktuple == NULL) + { + /* + * If no tuples are obtained from the PK for the current tuple, + * start over making sure that the indirect scan will advance. + */ + node->iiss_HaveIndirectTuple = false; + + /* + * If the primary key scan says that all tuples are dead, then the + * indirect index tuple doesn't point to anything live anymore, so + * have it removed. + */ + if (!&node->iiss_IndirectTupleIsLive) + { + node->iiss_IndScanDesc->kill_prior_tuple = true; + elog(WARNING, "killing old tuple"); + } + + continue; + } + + /* + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. + */ + ExecStoreTuple(pktuple, slot, node->iiss_PKScanDesc->xs_cbuf, false); + + /* + * Recheck the index quals. For indirect indexes, this is not + * optional. + */ + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqualorig, econtext, false)) + { + /* + * If the primary key scan says that all tuples are dead, then the + * indirect index tuple doesn't point to anything live anymore, so + * have it removed. + */ + if (!&node->iiss_IndirectTupleIsLive) + { + node->iiss_IndScanDesc->kill_prior_tuple = true; + elog(WARNING, "killing old tuple"); + } + + continue; + } + + return slot; + } + + return ExecClearTuple(slot); +} + +/* + * IndirectIndexRecheck -- access method routine to recheck a tuple in + * EvalPlanQual + */ +static bool +IndirectIndexRecheck(IndirectIndexScanState *node, TupleTableSlot *slot) +{ + elog(ERROR, "EvalPlanQual recheck is not supported in indirect index scans"); + return false; /* keep compiler quiet */ +} + +TupleTableSlot * +ExecIndirectIndexScan(IndirectIndexScanState *node) +{ + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->iiss_NumRuntimeKeys != 0 && !node->iiss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + Assert(node->iiss_NumOrderByKeys == 0); /* FIXME and test */ + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndirectIndexNext, + (ExecScanRecheckMtd) IndirectIndexRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanIndirectIndexScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void +ExecReScanIndirectIndexScan(IndirectIndexScanState *node) +{ + /* + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->iiss_NumRuntimeKeys != 0) + { + ExprContext *econtext = node->iiss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, + node->iiss_RuntimeKeys, + node->iiss_NumRuntimeKeys); + } + node->iiss_RuntimeKeysReady = true; + + /* XXX We don't support reordering of results */ + + /* reset index scan */ + Assert(node->iiss_NumOrderByKeys == 0); /* FIXME and test */ + index_rescan(node->iiss_IndScanDesc, + node->iiss_ScanKeys, node->iiss_NumScanKeys, + node->iiss_OrderByKeys, node->iiss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + +/* ---------------------------------------------------------------- + * ExecEndIndirectIndexScan + * ---------------------------------------------------------------- + */ +void +ExecEndIndirectIndexScan(IndirectIndexScanState *node) +{ + Relation indexRelationDesc; + Relation pkRelationDesc; + IndexScanDesc indexScanDesc; + IndexScanDesc pkScanDesc; + Relation relation; + + /* + * extract information from the node + */ + indexRelationDesc = node->iiss_IndRelationDesc; + pkRelationDesc = node->iiss_PKRelationDesc; + indexScanDesc = node->iiss_IndScanDesc; + pkScanDesc = node->iiss_PKScanDesc; + relation = node->ss.ss_currentRelation; + + /* clear out tuple table slots */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* close the index relations (no-op if we didn't open them) */ + if (indexScanDesc) + index_endscan(indexScanDesc); + if (pkScanDesc) + index_endscan(pkScanDesc); + if (indexRelationDesc) + index_close(indexRelationDesc, NoLock); + if (pkRelationDesc) + index_close(pkRelationDesc, NoLock); + + /* close the heap relation */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecIndirectIndexMarkPos + * ---------------------------------------------------------------- + */ +void +ExecIndirectIndexMarkPos(IndirectIndexScanState *node) +{ + index_markpos(node->iiss_IndScanDesc); + node->iiss_HaveIndirectTuple = false; /* necessary? */ +} + +/* ---------------------------------------------------------------- + * ExecIndirectIndexRestrPos + * ---------------------------------------------------------------- + */ +void +ExecIndirectIndexRestrPos(IndirectIndexScanState *node) +{ + index_restrpos(node->iiss_IndScanDesc); + node->iiss_HaveIndirectTuple = false; /* necessary? */ +} + +/* ---------------------------------------------------------------- + * ExecInitIndirectIndexScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: indirect index scans have 3 sets of state information + * because we have to keep track of the base relation, the + * indirect index relation itself, and the primary key index + * relation. + * ---------------------------------------------------------------- + */ +IndirectIndexScanState * +ExecInitIndirectIndexScan(IndirectIndexScan *node, EState *estate, int eflags) +{ + IndirectIndexScanState *indexstate; + Relation currentRelation; + bool relistarget; + int i; + + /* + * create state structure + */ + indexstate = makeNode(IndirectIndexScanState); + indexstate->ss.ps.plan = (Plan *) node; + indexstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &indexstate->ss.ps); + + indexstate->ss.ps.ps_TupFromTlist = false; + + /* + * initialize child expressions + */ + indexstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) indexstate); + indexstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) indexstate); + indexstate->indexqualorig = (List *) + ExecInitExpr((Expr *) node->indexqualorig, + (PlanState *) indexstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &indexstate->ss.ps); + ExecInitScanTupleSlot(estate, &indexstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); + + indexstate->ss.ss_currentRelation = currentRelation; + indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * get the scan type from the relation descriptor. + */ + ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation)); + + /* + * Initialize result tuple type and projection info. + */ + ExecAssignResultTypeFromTL(&indexstate->ss.ps); + ExecAssignScanProjectionInfo(&indexstate->ss); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return indexstate; + + /* + * Open the index relations. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the indexes, so we can avoid + * taking another lock here. Otherwise we need normal reader's locks. + */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + indexstate->iiss_IndRelationDesc = + index_open(node->indexid, relistarget ? NoLock : AccessShareLock); + indexstate->iiss_PKRelationDesc = + index_open(node->pkindexid, relistarget ? NoLock : AccessShareLock); + + /* + * Initialize index-specific scan state + */ + indexstate->iiss_HaveIndirectTuple = false; + indexstate->iiss_IndirectTupleIsLive = false; + indexstate->iiss_RuntimeKeysReady = false; + indexstate->iiss_RuntimeKeys = NULL; + indexstate->iiss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iiss_IndRelationDesc, + node->indexqual, + false, + &indexstate->iiss_ScanKeys, + &indexstate->iiss_NumScanKeys, + &indexstate->iiss_RuntimeKeys, + &indexstate->iiss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + Assert(indexstate->iiss_NumOrderByKeys == 0); /* FIXME and test */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->iiss_IndRelationDesc, + node->indexorderby, + true, + &indexstate->iiss_OrderByKeys, + &indexstate->iiss_NumOrderByKeys, + &indexstate->iiss_RuntimeKeys, + &indexstate->iiss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * For the PK scan, initialize the equality func OIDs of each column's + * datatype. + */ + indexstate->iiss_EqualFuncs = palloc(sizeof(Oid) * + indexstate->iiss_PKRelationDesc->rd_index->indnatts); + for (i = 0; i < indexstate->iiss_PKRelationDesc->rd_index->indnatts; i++) + { + AttrNumber attnum; + Oid opfamily; + Oid datatype; + Oid eq_op; + Oid eq_proc; + + attnum = indexstate->iiss_PKRelationDesc->rd_index->indkey.values[i]; + datatype = currentRelation->rd_att->attrs[attnum - 1]->atttypid; + opfamily = indexstate->iiss_PKRelationDesc->rd_opfamily[i]; + eq_op = get_opfamily_member(opfamily, datatype, datatype, + BTEqualStrategyNumber); + if (!OidIsValid(eq_op)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + BTEqualStrategyNumber, datatype, datatype, opfamily); + eq_proc = get_opcode(eq_op); + if (!OidIsValid(eq_proc)) + elog(ERROR, "missing oprcode for operator %u", eq_op); + indexstate->iiss_EqualFuncs[i] = eq_proc; + } + + /* XXX initialize sort support? See nodeIndexscan.c */ + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (indexstate->iiss_NumRuntimeKeys != 0) + { + ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &indexstate->ss.ps); + indexstate->iiss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; + indexstate->ss.ps.ps_ExprContext = stdecontext; + } + else + { + indexstate->iiss_RuntimeContext = NULL; + } + + /* + * Initialize scan descriptors. + * + * The snapshot to the indirect index doesn't matter, because we only use + * it to obtain primary key values; the visibility is only checked + * through the primary key index tuple anyway. + */ + indexstate->iiss_IndScanDesc = + index_beginscan(currentRelation, + indexstate->iiss_IndRelationDesc, + SnapshotAny, + indexstate->iiss_NumScanKeys, + indexstate->iiss_NumOrderByKeys); + + indexstate->iiss_PKScanDesc = + index_beginscan(currentRelation, + indexstate->iiss_PKRelationDesc, + estate->es_snapshot, + indexstate->iiss_PKRelationDesc->rd_index->indnatts, + 0); + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (indexstate->iiss_NumRuntimeKeys == 0) + index_rescan(indexstate->iiss_IndScanDesc, + indexstate->iiss_ScanKeys, + indexstate->iiss_NumScanKeys, + indexstate->iiss_OrderByKeys, + indexstate->iiss_NumOrderByKeys); + + /* + * all done. + */ + return indexstate; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0d85b15..49aa270 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -512,8 +512,10 @@ ExecInsert(ModifyTableState *mtstate, /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, true, &specConflict, - arbiterIndexes); + estate, false, true, + &specConflict, + arbiterIndexes, + NULL); /* adjust the tuple's state accordingly */ if (!specConflict) @@ -558,8 +560,9 @@ ExecInsert(ModifyTableState *mtstate, /* insert index entries for tuple */ if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, - arbiterIndexes); + estate, false, false, + NULL, arbiterIndexes, + NULL); } } @@ -969,6 +972,7 @@ ExecUpdate(ItemPointer tupleid, else { LockTupleMode lockmode; + Bitmapset *unchangedAttrs = NULL; /* * Constraints might reference the tableoid column, so initialize @@ -1012,7 +1016,9 @@ lreplace:; estate->es_output_cid, estate->es_crosscheck_snapshot, true /* wait for commit */ , - &hufd, &lockmode); + &hufd, + &unchangedAttrs, + &lockmode); switch (result) { case HeapTupleSelfUpdated: @@ -1097,12 +1103,13 @@ lreplace:; * * Note: heap_update returns the tid (location) of the new tuple in * the t_self field. - * - * If it's a HOT update, we mustn't insert new index entries. */ - if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) + if (resultRelInfo->ri_NumIndices > 0) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate, false, NULL, NIL); + estate, + HeapTupleIsHeapOnly(tuple), + false, NULL, NIL, + unchangedAttrs); } if (canSetTag) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 6955298..30942f6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -437,6 +437,34 @@ _copyIndexScan(const IndexScan *from) } /* + * _copyIndirectIndexScan + */ +static IndirectIndexScan * +_copyIndirectIndexScan(const IndirectIndexScan *from) +{ + IndirectIndexScan *newnode = makeNode(IndirectIndexScan); + + /* + * copy node superclass fields + */ + CopyScanFields((const Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(indexid); + COPY_SCALAR_FIELD(pkindexid); + COPY_NODE_FIELD(indexqual); + COPY_NODE_FIELD(indexqualorig); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indexorderbyorig); + COPY_NODE_FIELD(indexorderbyops); + COPY_SCALAR_FIELD(indexorderdir); + + return newnode; +} + +/* * _copyIndexOnlyScan */ static IndexOnlyScan * @@ -3158,6 +3186,7 @@ _copyIndexStmt(const IndexStmt *from) COPY_SCALAR_FIELD(indexOid); COPY_SCALAR_FIELD(oldNode); COPY_SCALAR_FIELD(unique); + COPY_SCALAR_FIELD(isindirect); COPY_SCALAR_FIELD(primary); COPY_SCALAR_FIELD(isconstraint); COPY_SCALAR_FIELD(deferrable); @@ -4430,6 +4459,9 @@ copyObject(const void *from) case T_IndexScan: retval = _copyIndexScan(from); break; + case T_IndirectIndexScan: + retval = _copyIndirectIndexScan(from); + break; case T_IndexOnlyScan: retval = _copyIndexOnlyScan(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 548a2aa..efcd04c 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1273,6 +1273,7 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) COMPARE_SCALAR_FIELD(indexOid); COMPARE_SCALAR_FIELD(oldNode); COMPARE_SCALAR_FIELD(unique); + COMPARE_SCALAR_FIELD(isindirect); COMPARE_SCALAR_FIELD(primary); COMPARE_SCALAR_FIELD(isconstraint); COMPARE_SCALAR_FIELD(deferrable); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9fe9873..7c39e13 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -489,6 +489,23 @@ _outIndexScan(StringInfo str, const IndexScan *node) } static void +_outIndirectIndexScan(StringInfo str, const IndirectIndexScan *node) +{ + WRITE_NODE_TYPE("INDIRECTINDEXSCAN"); + + _outScanInfo(str, (const Scan *) node); + + WRITE_OID_FIELD(indexid); + WRITE_OID_FIELD(pkindexid); + WRITE_NODE_FIELD(indexqual); + WRITE_NODE_FIELD(indexqualorig); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indexorderbyorig); + WRITE_NODE_FIELD(indexorderbyops); + WRITE_ENUM_FIELD(indexorderdir, ScanDirection); +} + +static void _outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node) { WRITE_NODE_TYPE("INDEXONLYSCAN"); @@ -2451,6 +2468,7 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_OID_FIELD(indexOid); WRITE_OID_FIELD(oldNode); WRITE_BOOL_FIELD(unique); + WRITE_BOOL_FIELD(isindirect); WRITE_BOOL_FIELD(primary); WRITE_BOOL_FIELD(isconstraint); WRITE_BOOL_FIELD(deferrable); @@ -3389,6 +3407,9 @@ outNode(StringInfo str, const void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_IndirectIndexScan: + _outIndirectIndexScan(str, obj); + break; case T_IndexOnlyScan: _outIndexOnlyScan(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 63f6336..29a3100 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1671,6 +1671,28 @@ _readIndexScan(void) } /* + * _readIndirectIndexScan + */ +static IndirectIndexScan * +_readIndirectIndexScan(void) +{ + READ_LOCALS(IndirectIndexScan); + + ReadCommonScan(&local_node->scan); + + READ_OID_FIELD(indexid); + READ_OID_FIELD(pkindexid); + READ_NODE_FIELD(indexqual); + READ_NODE_FIELD(indexqualorig); + READ_NODE_FIELD(indexorderby); + READ_NODE_FIELD(indexorderbyorig); + READ_NODE_FIELD(indexorderbyops); + READ_ENUM_FIELD(indexorderdir, ScanDirection); + + READ_DONE(); +} + +/* * _readIndexOnlyScan */ static IndexOnlyScan * @@ -2465,6 +2487,8 @@ parseNodeString(void) return_value = _readSampleScan(); else if (MATCH("INDEXSCAN", 9)) return_value = _readIndexScan(); + else if (MATCH("INDIRECTINDEXSCAN", 17)) + return_value = _readIndirectIndexScan(); else if (MATCH("INDEXONLYSCAN", 13)) return_value = _readIndexOnlyScan(); else if (MATCH("BITMAPINDEXSCAN", 15)) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2952bfb..5ff3ebe 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -776,9 +776,10 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, * and those we mustn't submit to add_path here.) * * Also, pick out the ones that are usable as bitmap scans. For that, we - * must discard indexes that don't support bitmap scans, and we also are - * only interested in paths that have some selectivity; we should discard - * anything that was generated solely for ordering purposes. + * must discard indexes that don't support bitmap scans as well as + * indirect indexes, and we also are only interested in paths that have + * some selectivity; we should discard anything that was generated solely + * for ordering purposes. */ foreach(lc, indexpaths) { @@ -788,6 +789,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, add_path(rel, (Path *) ipath); if (index->amhasgetbitmap && + !index->indirect && (ipath->path.pathkeys == NIL || ipath->indexselectivity < 1.0)) *bitindexpaths = lappend(*bitindexpaths, ipath); @@ -800,6 +802,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, */ if (skip_nonnative_saop) { + Assert(!index->indirect); /* what to do here? */ indexpaths = build_index_paths(root, rel, index, clauses, false, @@ -1119,6 +1122,10 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel, if (!index->amhasgetbitmap) continue; + /* Ignore indirect indexes too */ + if (index->indirect) + continue; + /* * Ignore partial indexes that do not match the query. If a partial * index is marked predOK then we know it's OK. Otherwise, we have to @@ -1798,6 +1805,10 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) if (!enable_indexonlyscan) return false; + /* Can't use indirect indexes for index-only scans */ + if (index->indirect) + return false; + /* * Check that all needed attributes of the relation are available from the * index. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ad49674..52ec437 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -166,6 +166,17 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, List *indexorderby, List *indexorderbyorig, List *indexorderbyops, ScanDirection indexscandir); +static IndirectIndexScan *make_indirectindexscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + Oid pkindexid, + List *indexqual, + List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir); static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexorderby, @@ -2494,6 +2505,8 @@ create_indexscan_plan(PlannerInfo *root, /* Finally ready to build the plan node */ if (indexonly) + { + Assert(!best_path->indexinfo->indirect); /* maybe someday */ scan_plan = (Scan *) make_indexonlyscan(tlist, qpqual, baserelid, @@ -2502,6 +2515,23 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexorderbys, best_path->indexinfo->indextlist, best_path->indexscandir); + } + else if (best_path->indexinfo->indirect) + { + Oid pkindexoid = best_path->indexinfo->pkindex->indexoid; + + scan_plan = (Scan *) make_indirectindexscan(tlist, + qpqual, + baserelid, + indexoid, + pkindexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + indexorderbyops, + best_path->indexscandir); + } else scan_plan = (Scan *) make_indexscan(tlist, qpqual, @@ -4720,6 +4750,39 @@ make_indexscan(List *qptlist, return node; } +static IndirectIndexScan * +make_indirectindexscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + Oid pkindexid, + List *indexqual, + List *indexqualorig, + List *indexorderby, + List *indexorderbyorig, + List *indexorderbyops, + ScanDirection indexscandir) +{ + IndirectIndexScan *node = makeNode(IndirectIndexScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->pkindexid = pkindexid; + node->indexqual = indexqual; + node->indexqualorig = indexqualorig; + node->indexorderby = indexorderby; + node->indexorderbyorig = indexorderbyorig; + node->indexorderbyops = indexorderbyops; + node->indexorderdir = indexscandir; + + return node; +} + static IndexOnlyScan * make_indexonlyscan(List *qptlist, List *qpqual, diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 2fe1c8c..8401c14 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -490,6 +490,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->indexorderbyorig, rtoffset); } break; + case T_IndirectIndexScan: + { + IndirectIndexScan *splan = (IndirectIndexScan *) plan; + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->indexqual = + fix_scan_list(root, splan->indexqual, rtoffset); + splan->indexqualorig = + fix_scan_list(root, splan->indexqualorig, rtoffset); + splan->indexorderby = + fix_scan_list(root, splan->indexorderby, rtoffset); + splan->indexorderbyorig = + fix_scan_list(root, splan->indexorderbyorig, rtoffset); + } + break; case T_IndexOnlyScan: { IndexOnlyScan *splan = (IndexOnlyScan *) plan; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 3171743..aa5190c 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -2327,6 +2327,13 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_IndirectIndexScan: + finalize_primnode((Node *) ((IndirectIndexScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndirectIndexScan *) plan)->indexorderby, + &context); + break; + case T_IndexOnlyScan: finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, &context); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 72272d9..fa78e33 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -152,6 +152,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, List *indexoidlist; ListCell *l; LOCKMODE lmode; + IndexOptInfo *pkinfo = NULL; indexoidlist = RelationGetIndexList(relation); @@ -234,6 +235,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } info->relam = indexRelation->rd_rel->relam; + info->indirect = index->indisindirect; + if (info->indirect) + { + Assert(pkinfo != NULL); + info->pkindex = pkinfo; + } /* We copy just the fields we need, not all of rd_amroutine */ amroutine = indexRelation->rd_amroutine; @@ -354,6 +361,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->immediate = index->indimmediate; info->hypothetical = false; + /* remember primary key for indirect indexes */ + if (index->indisprimary) + pkinfo = info; + /* * Estimate the index size. If it's not a partial index, we lock * the number-of-tuples estimate to equal the parent table; if it diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 834a009..8d1ef2e 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -417,7 +417,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type overlay_placing substr_from substr_for %type opt_instead -%type opt_unique opt_concurrently opt_verbose opt_full +%type opt_unique opt_indirect opt_concurrently opt_verbose opt_full %type opt_freeze opt_default opt_recheck %type opt_binary opt_oids copy_delimiter @@ -614,7 +614,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P - INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P + INCLUDING INCREMENT INDEX INDEXES INDIRECT + INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -7016,25 +7017,26 @@ defacl_privilege_target: * willing to make TABLESPACE a fully reserved word. *****************************************************************************/ -IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name +IndexStmt: CREATE opt_unique opt_indirect INDEX opt_concurrently opt_index_name ON qualified_name access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; - n->concurrent = $4; - n->idxname = $5; - n->relation = $7; - n->accessMethod = $8; - n->indexParams = $10; - n->options = $12; - n->tableSpace = $13; - n->whereClause = $14; + n->concurrent = $5; + n->idxname = $6; + n->relation = $8; + n->accessMethod = $9; + n->indexParams = $11; + n->options = $13; + n->tableSpace = $14; + n->whereClause = $15; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; n->oldNode = InvalidOid; n->primary = false; + n->isindirect = $3; n->isconstraint = false; n->deferrable = false; n->initdeferred = false; @@ -7042,25 +7044,26 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_index_name n->if_not_exists = false; $$ = (Node *)n; } - | CREATE opt_unique INDEX opt_concurrently IF_P NOT EXISTS index_name + | CREATE opt_unique opt_indirect INDEX opt_concurrently IF_P NOT EXISTS index_name ON qualified_name access_method_clause '(' index_params ')' opt_reloptions OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); n->unique = $2; - n->concurrent = $4; - n->idxname = $8; - n->relation = $10; - n->accessMethod = $11; - n->indexParams = $13; - n->options = $15; - n->tableSpace = $16; - n->whereClause = $17; + n->concurrent = $5; + n->idxname = $9; + n->relation = $11; + n->accessMethod = $12; + n->indexParams = $14; + n->options = $16; + n->tableSpace = $17; + n->whereClause = $18; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; n->oldNode = InvalidOid; n->primary = false; + n->isindirect = $3; n->isconstraint = false; n->deferrable = false; n->initdeferred = false; @@ -7075,6 +7078,11 @@ opt_unique: | /*EMPTY*/ { $$ = FALSE; } ; +opt_indirect: + INDIRECT { $$ = TRUE; } + | /*EMPTY*/ { $$ = FALSE; } + ; + opt_concurrently: CONCURRENTLY { $$ = TRUE; } | /*EMPTY*/ { $$ = FALSE; } @@ -14106,6 +14114,7 @@ unreserved_keyword: | INCREMENT | INDEX | INDEXES + | INDIRECT | INHERIT | INHERITS | INLINE_P diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 4e2ba19..14b0de7 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1248,7 +1248,8 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, if (!attrsOnly) { if (!isConstraint) - appendStringInfo(&buf, "CREATE %sINDEX %s ON %s USING %s (", + appendStringInfo(&buf, "CREATE %s%sINDEX %s ON %s USING %s (", + idxrec->indisindirect ? "INDIRECT " : "", idxrec->indisunique ? "UNIQUE " : "", quote_identifier(NameStr(idxrelrec->relname)), generate_relation_name(indrelid, NIL), diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 4973396..5df2bd7 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5029,6 +5029,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, if (index->relam != BTREE_AM_OID) continue; + /* Ignore indirect indexes */ + if (index->indirect) + continue; + /* * Ignore partial indexes --- we only want stats that cover the entire * relation. @@ -5149,7 +5153,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* Fetch first tuple in sortop's direction */ if ((tup = index_getnext(index_scan, - indexscandir)) != NULL) + indexscandir, NULL)) != NULL) { /* Extract the index column values from the heap tuple */ ExecStoreTuple(tup, slot, InvalidBuffer, false); @@ -5181,7 +5185,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* Fetch first tuple in reverse direction */ if ((tup = index_getnext(index_scan, - -indexscandir)) != NULL) + -indexscandir, NULL)) != NULL) { /* Extract the index column values from the heap tuple */ ExecStoreTuple(tup, slot, InvalidBuffer, false); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2a68359..1e2ff56 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -271,7 +271,7 @@ static TupleDesc GetPgIndexDescriptor(void); static void AttrDefaultFetch(Relation relation); static void CheckConstraintFetch(Relation relation); static int CheckConstraintCmp(const void *a, const void *b); -static List *insert_ordered_oid(List *list, Oid datum); +static List *insert_ordered_oid(List *list, Oid datum, bool must_be_first); static void InitIndexAmRoutine(Relation relation); static void IndexSupportInitialize(oidvector *indclass, RegProcedure *indexSupport, @@ -2335,8 +2335,10 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) list_free_deep(relation->rd_fkeylist); list_free(relation->rd_indexlist); bms_free(relation->rd_indexattr); + bms_free(relation->rd_hotattr); bms_free(relation->rd_keyattr); bms_free(relation->rd_idattr); + bms_free(relation->rd_indirectattr); if (relation->rd_options) pfree(relation->rd_options); if (relation->rd_indextuple) @@ -4288,6 +4290,47 @@ RelationGetFKeyList(Relation relation) } /* + * Return the relation's primary key OID. + * + * Surely this can be made better ... + */ +Oid +RelationGetPrimaryKey(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + Oid pkid = InvalidOid; + + /* Currently we just scan pg_index every time this is called */ + ScanKeyInit(&skey, + Anum_pg_index_indrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(IndexRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, IndexIndrelidIndexId, true, + NULL, 1, &skey); + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + { + Form_pg_index index = (Form_pg_index) GETSTRUCT(htup); + + if (!IndexIsLive(index)) + continue; + if (!index->indisprimary) + continue; + pkid = index->indexrelid; + break; + } + + systable_endscan(indscan); + heap_close(indrel, AccessShareLock); + + return pkid; +} + +/* * RelationGetIndexList -- get a list of OIDs of indexes on this relation * * The index list is created only if someone requests it. We scan pg_index @@ -4301,7 +4344,8 @@ RelationGetFKeyList(Relation relation) * Such indexes are expected to be dropped momentarily, and should not be * touched at all by any caller of this function. * - * The returned list is guaranteed to be sorted in order by OID. This is + * The returned list is guaranteed to be sorted in order by OID, except that + * the primary key is always in front. This is * needed by the executor, since for index types that we obtain exclusive * locks on when updating the index, all backends must lock the indexes in * the same order or we will get deadlocks (see ExecOpenIndices()). Any @@ -4377,7 +4421,8 @@ RelationGetIndexList(Relation relation) continue; /* Add index's OID to result list in the proper order */ - result = insert_ordered_oid(result, index->indexrelid); + result = insert_ordered_oid(result, index->indexrelid, + index->indisprimary); /* * indclass cannot be referenced directly through the C struct, @@ -4450,12 +4495,12 @@ RelationGetIndexList(Relation relation) * indexes... */ static List * -insert_ordered_oid(List *list, Oid datum) +insert_ordered_oid(List *list, Oid datum, bool must_be_first) { ListCell *prev; /* Does the datum belong at the front? */ - if (list == NIL || datum < linitial_oid(list)) + if (list == NIL || datum < linitial_oid(list) || must_be_first) return lcons_oid(datum, list); /* No, so find the entry it belongs after */ prev = list_head(list); @@ -4492,7 +4537,7 @@ insert_ordered_oid(List *list, Oid datum) * to ensure that a correct rd_indexattr set has been cached before first * calling RelationSetIndexList; else a subsequent inquiry might cause a * wrong rd_indexattr set to get computed and cached. Likewise, we do not - * touch rd_keyattr or rd_idattr. + * touch rd_hotattr, rd_keyattr, rd_indirectattr or rd_idattr. */ void RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex) @@ -4703,9 +4748,7 @@ RelationGetIndexPredicate(Relation relation) * simple index keys, but attributes used in expressions and partial-index * predicates.) * - * Depending on attrKind, a bitmap covering the attnums for all index columns, - * for all potential foreign key columns, or for all columns in the configured - * replica identity index is returned. + * A bitmap covering attnums is returned, depending on attrKind. * * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that * we can include system attributes (e.g., OID) in the bitmap representation. @@ -4721,8 +4764,10 @@ Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) { Bitmapset *indexattrs; /* indexed columns */ + Bitmapset *hotattrs; /* HOT-index-relevant columns */ Bitmapset *uindexattrs; /* columns in unique indexes */ Bitmapset *idindexattrs; /* columns in the replica identity */ + Bitmapset *indirectattrs; /* columns in indirect indexes */ List *indexoidlist; Oid relreplindex; ListCell *l; @@ -4737,8 +4782,12 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return bms_copy(relation->rd_indexattr); case INDEX_ATTR_BITMAP_KEY: return bms_copy(relation->rd_keyattr); + case INDEX_ATTR_BITMAP_HOT: + return bms_copy(relation->rd_hotattr); case INDEX_ATTR_BITMAP_IDENTITY_KEY: return bms_copy(relation->rd_idattr); + case INDEX_ATTR_BITMAP_INDIRECT_INDEXES: + return bms_copy(relation->rd_indirectattr); default: elog(ERROR, "unknown attrKind %u", attrKind); } @@ -4767,7 +4816,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relreplindex = relation->rd_replidindex; /* - * For each index, add referenced attributes to indexattrs. + * For each index, add referenced attributes to the attribute bitmaps. * * Note: we consider all indexes returned by RelationGetIndexList, even if * they are not indisready or indisvalid. This is important because an @@ -4777,8 +4826,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) * won't be returned at all by RelationGetIndexList. */ indexattrs = NULL; + hotattrs = NULL; uindexattrs = NULL; idindexattrs = NULL; + indirectattrs = NULL; foreach(l, indexoidlist) { Oid indexOid = lfirst_oid(l); @@ -4787,6 +4838,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) int i; bool isKey; /* candidate key */ bool isIDKey; /* replica identity index */ + bool isIndirect; /* an indirect index */ + Bitmapset *exprattrs = NULL; indexDesc = index_open(indexOid, AccessShareLock); @@ -4801,6 +4854,9 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) /* Is this index the configured (or default) replica identity? */ isIDKey = (indexOid == relreplindex); + /* Is this an indirect index? */ + isIndirect = indexInfo->ii_IsIndirect; + /* Collect simple attribute references */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { @@ -4809,7 +4865,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) if (attrnum != 0) { indexattrs = bms_add_member(indexattrs, - attrnum - FirstLowInvalidHeapAttributeNumber); + attrnum - FirstLowInvalidHeapAttributeNumber); if (isKey) uindexattrs = bms_add_member(uindexattrs, @@ -4818,14 +4874,31 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) if (isIDKey) idindexattrs = bms_add_member(idindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); + + if (isIndirect) + indirectattrs = bms_add_member(indirectattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + else + hotattrs = bms_add_member(hotattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); } } /* Collect all attributes used in expressions, too */ - pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs); + pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &exprattrs); + indexattrs = bms_add_members(indexattrs, exprattrs); + /* these don't affect KEY (unique) indexes */ + hotattrs = bms_add_members(hotattrs, exprattrs); + indirectattrs = bms_add_members(indirectattrs, exprattrs); + bms_free(exprattrs); /* Collect all attributes in the index predicate, too */ - pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs); + exprattrs = NULL; + pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &exprattrs); + indexattrs = bms_add_members(indexattrs, exprattrs); + hotattrs = bms_add_members(hotattrs, exprattrs); + /* these don't affect KEY or indirect indexes */ + bms_free(exprattrs); index_close(indexDesc, AccessShareLock); } @@ -4835,10 +4908,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) /* Don't leak the old values of these bitmaps, if any */ bms_free(relation->rd_indexattr); relation->rd_indexattr = NULL; + bms_free(relation->rd_hotattr); + relation->rd_hotattr = NULL; bms_free(relation->rd_keyattr); relation->rd_keyattr = NULL; bms_free(relation->rd_idattr); relation->rd_idattr = NULL; + bms_free(relation->rd_indirectattr); + relation->rd_indirectattr = NULL; /* * Now save copies of the bitmaps in the relcache entry. We intentionally @@ -4851,6 +4928,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relation->rd_keyattr = bms_copy(uindexattrs); relation->rd_idattr = bms_copy(idindexattrs); relation->rd_indexattr = bms_copy(indexattrs); + relation->rd_hotattr = bms_copy(hotattrs); + relation->rd_indirectattr = bms_copy(indirectattrs); MemoryContextSwitchTo(oldcxt); /* We return our original working copy for caller to play with */ @@ -4858,10 +4937,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) { case INDEX_ATTR_BITMAP_ALL: return indexattrs; + case INDEX_ATTR_BITMAP_HOT: + return hotattrs; case INDEX_ATTR_BITMAP_KEY: return uindexattrs; case INDEX_ATTR_BITMAP_IDENTITY_KEY: return idindexattrs; + case INDEX_ATTR_BITMAP_INDIRECT_INDEXES: + return indirectattrs; default: elog(ERROR, "unknown attrKind %u", attrKind); return NULL; @@ -5409,8 +5492,10 @@ load_relcache_init_file(bool shared) rel->rd_oidindex = InvalidOid; rel->rd_replidindex = InvalidOid; rel->rd_indexattr = NULL; + rel->rd_hotattr = NULL; rel->rd_keyattr = NULL; rel->rd_idattr = NULL; + rel->rd_indirectattr = NULL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 1036cca..9f28c7d 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -175,6 +175,8 @@ typedef struct IndexAmRoutine bool amclusterable; /* does AM handle predicate locks? */ bool ampredlocks; + /* does AM support indirect indexes? */ + bool amcanindirect; /* type of data stored in index, or InvalidOid if variable */ Oid amkeytype; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 81907d5..ff240f3 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -102,13 +102,17 @@ typedef struct SysScanDescData *SysScanDesc; * call is made with UNIQUE_CHECK_EXISTING. The tuple is already in the * index in this case, so it should not be inserted again. Rather, just * check for conflicting live tuples (possibly blocking). + * + * UNIQUE_CHECK_INSERT_SINGLETON only inserts if there isn't already an + * index tuple. This is supported for indirect indexes. */ typedef enum IndexUniqueCheck { UNIQUE_CHECK_NO, /* Don't do any uniqueness checking */ UNIQUE_CHECK_YES, /* Enforce uniqueness at insertion time */ UNIQUE_CHECK_PARTIAL, /* Test uniqueness, but no error */ - UNIQUE_CHECK_EXISTING /* Check if existing tuple is unique */ + UNIQUE_CHECK_EXISTING, /* Check if existing tuple is unique */ + UNIQUE_CHECK_INSERT_SINGLETON /* Only insert if value doesn't exist */ } IndexUniqueCheck; @@ -127,6 +131,7 @@ extern void index_close(Relation relation, LOCKMODE lockmode); extern bool index_insert(Relation indexRelation, Datum *values, bool *isnull, + Datum *pkeyValues, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique); @@ -146,8 +151,12 @@ extern void index_markpos(IndexScanDesc scan); extern void index_restrpos(IndexScanDesc scan); extern ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction); -extern HeapTuple index_fetch_heap(IndexScanDesc scan); -extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction); +extern void index_getnext_pkey(IndexScanDesc scan, ScanDirection direction, + Datum *pkvals, bool *isDone); +extern HeapTuple index_fetch_heap(IndexScanDesc scan, bool *anyfound); + +extern HeapTuple index_getnext(IndexScanDesc scan, ScanDirection direction, + bool *anyfound); extern int64 index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap); extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 0d12bbb..99af368 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -16,6 +16,7 @@ #include "access/sdir.h" #include "access/skey.h" +#include "nodes/bitmapset.h" #include "nodes/lockoptions.h" #include "nodes/primnodes.h" #include "storage/bufpage.h" @@ -160,7 +161,8 @@ extern void heap_abort_speculative(Relation relation, HeapTuple tuple); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, - HeapUpdateFailureData *hufd, LockTupleMode *lockmode); + HeapUpdateFailureData *hufd, Bitmapset **unchanged_attrs, + LockTupleMode *lockmode); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_update, diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 8fb1f6d..9b4a886 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -501,7 +501,7 @@ do { \ #define HeapTupleHeaderIsHeapOnly(tup) \ ( \ - (tup)->t_infomask2 & HEAP_ONLY_TUPLE \ + ((tup)->t_infomask2 & HEAP_ONLY_TUPLE) != 0 \ ) #define HeapTupleHeaderSetHeapOnly(tup) \ diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 37e6ef3..0a70e08 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -37,6 +37,21 @@ typedef enum INDEX_DROP_SET_DEAD } IndexStateFlagsAction; +static inline void +FAKE_CTID_FROM_PKVALUES(ItemPointer iptr, int16 pkNumKeys, Datum *pkvalues) +{ + /* We should support more than one column in the PK */ + Assert(pkNumKeys == 1); + + /* + * Because some overzealous checks, we can't have all-zeroes offnum + * nor blknum, so we always set their lowest-order bit to 1; make sure + * to ignore that bit when reading back the value from the TID. + */ + iptr->ip_posid = ((pkvalues[0] & 0x7fff) << 1) | 1; + iptr->ip_blkid.bi_hi = (pkvalues[0] >> 15) & 0xffff; + iptr->ip_blkid.bi_lo = ((pkvalues[0] >> 31) & 0xffff) | 1; +} extern void index_check_primary_key(Relation heapRel, IndexInfo *indexInfo, diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index ee97c5d..144d204 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -34,6 +34,7 @@ CATALOG(pg_index,2610) BKI_WITHOUT_OIDS BKI_SCHEMA_MACRO Oid indrelid; /* OID of the relation it indexes */ int16 indnatts; /* number of columns in index */ bool indisunique; /* is this a unique index? */ + bool indisindirect; /* is this an indirect index? */ bool indisprimary; /* is this index for primary key? */ bool indisexclusion; /* is this index for exclusion constraint? */ bool indimmediate; /* is uniqueness enforced immediately? */ @@ -70,26 +71,27 @@ typedef FormData_pg_index *Form_pg_index; * compiler constants for pg_index * ---------------- */ -#define Natts_pg_index 19 +#define Natts_pg_index 20 #define Anum_pg_index_indexrelid 1 #define Anum_pg_index_indrelid 2 #define Anum_pg_index_indnatts 3 #define Anum_pg_index_indisunique 4 -#define Anum_pg_index_indisprimary 5 -#define Anum_pg_index_indisexclusion 6 -#define Anum_pg_index_indimmediate 7 -#define Anum_pg_index_indisclustered 8 -#define Anum_pg_index_indisvalid 9 -#define Anum_pg_index_indcheckxmin 10 -#define Anum_pg_index_indisready 11 -#define Anum_pg_index_indislive 12 -#define Anum_pg_index_indisreplident 13 -#define Anum_pg_index_indkey 14 -#define Anum_pg_index_indcollation 15 -#define Anum_pg_index_indclass 16 -#define Anum_pg_index_indoption 17 -#define Anum_pg_index_indexprs 18 -#define Anum_pg_index_indpred 19 +#define Anum_pg_index_indisindirect 5 +#define Anum_pg_index_indisprimary 6 +#define Anum_pg_index_indisexclusion 7 +#define Anum_pg_index_indimmediate 8 +#define Anum_pg_index_indisclustered 9 +#define Anum_pg_index_indisvalid 10 +#define Anum_pg_index_indcheckxmin 11 +#define Anum_pg_index_indisready 12 +#define Anum_pg_index_indislive 13 +#define Anum_pg_index_indisreplident 14 +#define Anum_pg_index_indkey 15 +#define Anum_pg_index_indcollation 16 +#define Anum_pg_index_indclass 17 +#define Anum_pg_index_indoption 18 +#define Anum_pg_index_indexprs 19 +#define Anum_pg_index_indpred 20 /* * Index AMs that support ordered scans must support these two indoption diff --git a/src/include/executor/execdebug.h b/src/include/executor/execdebug.h index 950a0bc..e662a69 100644 --- a/src/include/executor/execdebug.h +++ b/src/include/executor/execdebug.h @@ -17,6 +17,7 @@ #ifndef EXECDEBUG_H #define EXECDEBUG_H +#include "access/printtup.h" #include "executor/executor.h" #include "nodes/print.h" diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index b74fa5e..14a2740 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -377,8 +377,9 @@ extern void UnregisterExprContextCallback(ExprContext *econtext, extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate, bool noDupErr, bool *specConflict, - List *arbiterIndexes); + EState *estate, bool isHOTupdate, bool noDupErr, + bool *specConflict, List *arbiterIndexes, + Bitmapset *unchangedAttrs); extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate, ItemPointer conflictTid, List *arbiterIndexes); extern void check_exclusion_constraint(Relation heap, Relation index, diff --git a/src/include/executor/nodeIndirectIndexscan.h b/src/include/executor/nodeIndirectIndexscan.h new file mode 100644 index 0000000..46eea32 --- /dev/null +++ b/src/include/executor/nodeIndirectIndexscan.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * nodeIndirectIndexscan.h + * + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeIndirectIndexscan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEINDIRECTINDEXSCAN_H +#define NODEINDIRECTINDEXSCAN_H + +#include "nodes/execnodes.h" + +extern IndirectIndexScanState *ExecInitIndirectIndexScan(IndirectIndexScan *node, EState *estate, int eflags); +extern TupleTableSlot *ExecIndirectIndexScan(IndirectIndexScanState *node); +extern void ExecEndIndirectIndexScan(IndirectIndexScanState *node); +extern void ExecIndirectIndexMarkPos(IndirectIndexScanState *node); +extern void ExecIndirectIndexRestrPos(IndirectIndexScanState *node); +extern void ExecReScanIndirectIndexScan(IndirectIndexScanState *node); + +#endif /* NODEINDIRECTINDEXSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d43ec56..09cc997 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -49,6 +49,7 @@ * UniqueProcs * UniqueStrats * Unique is it a unique index? + * IsIndirect is it an indirect index? * ReadyForInserts is it valid for inserts? * Concurrent are we doing a concurrent index build? * BrokenHotChain did we detect any broken HOT chains? @@ -73,6 +74,7 @@ typedef struct IndexInfo Oid *ii_UniqueProcs; /* array with one entry per column */ uint16 *ii_UniqueStrats; /* array with one entry per column */ bool ii_Unique; + bool ii_IsIndirect; bool ii_ReadyForInserts; bool ii_Concurrent; bool ii_BrokenHotChain; @@ -307,6 +309,7 @@ typedef struct JunkFilter * NumIndices # of indices existing on result relation * IndexRelationDescs array of relation descriptors for indices * IndexRelationInfo array of key/attr info for indices + * PrimaryKeyIndex array index for the primary key * TrigDesc triggers to be fired, if any * TrigFunctions cached lookup info for trigger functions * TrigWhenExprs array of trigger WHEN expr states @@ -314,6 +317,7 @@ typedef struct JunkFilter * FdwRoutine FDW callback functions, if foreign table * FdwState available to save private state of FDW * usesFdwDirectModify true when modifying foreign table directly + * hasIndirectIndexes true when there are any indirect indexes * WithCheckOptions list of WithCheckOption's to be checked * WithCheckOptionExprs list of WithCheckOption expr states * ConstraintExprs array of constraint-checking expr states @@ -333,6 +337,7 @@ typedef struct ResultRelInfo int ri_NumIndices; RelationPtr ri_IndexRelationDescs; IndexInfo **ri_IndexRelationInfo; + Index ri_PrimaryKeyIndex; TriggerDesc *ri_TrigDesc; FmgrInfo *ri_TrigFunctions; List **ri_TrigWhenExprs; @@ -340,6 +345,7 @@ typedef struct ResultRelInfo struct FdwRoutine *ri_FdwRoutine; void *ri_FdwState; bool ri_usesFdwDirectModify; + bool ri_hasIndirectIndexes; List *ri_WithCheckOptions; List *ri_WithCheckOptionExprs; List **ri_ConstraintExprs; @@ -1382,6 +1388,47 @@ typedef struct IndexScanState } IndexScanState; /* ---------------- + * IndirectIndexScanState information + * + * indexqualorig execution state for indexqualorig expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * IndRelationDesc indirect index relation descriptor + * IndScanDesc indirect index scan descriptor + * PKRelationDesc primary key index relation descriptor + * PKScanDesc primary key index scan descriptor + * HaveIndirectTuple a tuple has been read from the index + * IndirectTupleIsLive has the read tuple been proved live? + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * ---------------- + */ +typedef struct IndirectIndexScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *indexqualorig; + ScanKey iiss_ScanKeys; + int iiss_NumScanKeys; + Relation iiss_IndRelationDesc; + IndexScanDesc iiss_IndScanDesc; + Relation iiss_PKRelationDesc; + IndexScanDesc iiss_PKScanDesc; + bool iiss_HaveIndirectTuple; + bool iiss_IndirectTupleIsLive; + ScanKey iiss_OrderByKeys; + int iiss_NumOrderByKeys; + IndexRuntimeKeyInfo *iiss_RuntimeKeys; + int iiss_NumRuntimeKeys; + bool iiss_RuntimeKeysReady; + Oid *iiss_EqualFuncs; + ExprContext *iiss_RuntimeContext; +} IndirectIndexScanState; + +/* ---------------- * IndexOnlyScanState information * * indexqual execution state for indexqual expressions diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c514d3f..b57f235 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -53,6 +53,7 @@ typedef enum NodeTag T_SeqScan, T_SampleScan, T_IndexScan, + T_IndirectIndexScan, T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, @@ -101,6 +102,7 @@ typedef enum NodeTag T_SeqScanState, T_SampleScanState, T_IndexScanState, + T_IndirectIndexScanState, T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index fc532fb..40e1547 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2558,6 +2558,7 @@ typedef struct IndexStmt Oid indexOid; /* OID of an existing index, if any */ Oid oldNode; /* relfilenode of existing storage, if any */ bool unique; /* is index unique? */ + bool isindirect; /* is index indirect? */ bool primary; /* is index a primary key? */ bool isconstraint; /* is it for a pkey/unique constraint? */ bool deferrable; /* is the constraint DEFERRABLE? */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e2fbc7d..c1e6a95 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -353,6 +353,27 @@ typedef struct IndexScan } IndexScan; /* ---------------- + * indirect index scan node + * + * IndirectIndexScan is very similar to IndexScan, but it specifies a + * scan of an indirect index. In addition to the fields in IndexScan, + * it has the OID of the primary key that this index references. + * ---------------- + */ +typedef struct IndirectIndexScan +{ + Scan scan; + Oid indexid; /* OID of indirect index */ + Oid pkindexid; /* OID of the primary key it references */ + List *indexqual; /* list of index quals */ + List *indexqualorig; /* the same in original form */ + List *indexorderby; /* list of index ORDER BY exprs */ + List *indexorderbyorig; /* the same in original form */ + List *indexorderbyops; /* OIDs of sort ops for ORDER BY exprs */ + ScanDirection indexorderdir; /* forward or backward or don't care */ +} IndirectIndexScan; + +/* ---------------- * index-only scan node * * IndexOnlyScan is very similar to IndexScan, but it specifies an diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 3a1255a..82cf1d6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -581,6 +581,7 @@ typedef struct IndexOptInfo Oid indexoid; /* OID of the index relation */ Oid reltablespace; /* tablespace of index (not table) */ RelOptInfo *rel; /* back-link to index's table */ + struct IndexOptInfo *pkindex; /* link to referenced index, if indirect */ /* index-size statistics (from pg_class and elsewhere) */ BlockNumber pages; /* number of disk pages in index */ @@ -613,6 +614,7 @@ typedef struct IndexOptInfo bool predOK; /* true if index predicate matches query */ bool unique; /* true if a unique index */ bool immediate; /* is uniqueness enforced immediately? */ + bool indirect; /* true if index is indirect */ bool hypothetical; /* true if index doesn't really exist */ /* Remaining fields are copied from the index AM's API struct: */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 581ff6e..dbcc892 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -196,6 +196,7 @@ PG_KEYWORD("including", INCLUDING, UNRESERVED_KEYWORD) PG_KEYWORD("increment", INCREMENT, UNRESERVED_KEYWORD) PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD) PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD) +PG_KEYWORD("indirect", INDIRECT, UNRESERVED_KEYWORD) PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD) PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD) PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index cd7ea1d..b046f12 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -135,9 +135,11 @@ typedef struct RelationData Oid rd_replidindex; /* OID of replica identity index, if any */ /* data managed by RelationGetIndexAttrBitmap: */ - Bitmapset *rd_indexattr; /* identifies columns used in indexes */ + Bitmapset *rd_indexattr; /* identifies columns used in any index */ + Bitmapset *rd_hotattr; /* columns used in HOT-relevant indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ Bitmapset *rd_idattr; /* included in replica identity index */ + Bitmapset *rd_indirectattr; /* cols part of any indirect index */ /* * rd_options is set whenever rd_rel is loaded into the relcache entry. diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 6ea7dd2..0e5df4c 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,7 @@ extern void RelationClose(Relation relation); */ extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); +extern Oid RelationGetPrimaryKey(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation); @@ -48,7 +49,9 @@ typedef enum IndexAttrBitmapKind { INDEX_ATTR_BITMAP_ALL, INDEX_ATTR_BITMAP_KEY, - INDEX_ATTR_BITMAP_IDENTITY_KEY + INDEX_ATTR_BITMAP_HOT, + INDEX_ATTR_BITMAP_IDENTITY_KEY, + INDEX_ATTR_BITMAP_INDIRECT_INDEXES } IndexAttrBitmapKind; extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation,