*** a/contrib/pageinspect/Makefile
--- b/contrib/pageinspect/Makefile
***************
*** 1,7 ****
  # contrib/pageinspect/Makefile
  
  MODULE_big	= pageinspect
! OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
  
  EXTENSION = pageinspect
  DATA = pageinspect--1.2.sql pageinspect--1.0--1.1.sql \
--- 1,7 ----
  # contrib/pageinspect/Makefile
  
  MODULE_big	= pageinspect
! OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o mmfuncs.o
  
  EXTENSION = pageinspect
  DATA = pageinspect--1.2.sql pageinspect--1.0--1.1.sql \
*** /dev/null
--- b/contrib/pageinspect/mmfuncs.c
***************
*** 0 ****
--- 1,418 ----
+ /*
+  * mmfuncs.c
+  * 		Functions to investigate MinMax indexes
+  *
+  * Copyright (c) 2013, PostgreSQL Global Development Group
+  *
+  * IDENTIFICATION
+  * 		contrib/pageinspect/mmfuncs.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
+ #include "catalog/index.h"
+ #include "catalog/pg_type.h"
+ #include "funcapi.h"
+ #include "utils/array.h"
+ #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
+ #include "utils/rel.h"
+ #include "miscadmin.h"
+ 
+ Datum minmax_page_type(PG_FUNCTION_ARGS);
+ Datum minmax_page_items(PG_FUNCTION_ARGS);
+ Datum minmax_metapage_info(PG_FUNCTION_ARGS);
+ Datum minmax_revmap_array_data(PG_FUNCTION_ARGS);
+ Datum minmax_revmap_data(PG_FUNCTION_ARGS);
+ 
+ PG_FUNCTION_INFO_V1(minmax_page_type);
+ PG_FUNCTION_INFO_V1(minmax_page_items);
+ PG_FUNCTION_INFO_V1(minmax_metapage_info);
+ PG_FUNCTION_INFO_V1(minmax_revmap_array_data);
+ PG_FUNCTION_INFO_V1(minmax_revmap_data);
+ 
+ typedef struct mm_page_state
+ {
+ 	TupleDesc	tupdesc;
+ 	Page		page;
+ 	OffsetNumber offset;
+ 	bool		unusedItem;
+ 	bool		done;
+ 	AttrNumber	attno;
+ 	DeformedMMTuple *dtup;
+ 	FmgrInfo	outputfn[FLEXIBLE_ARRAY_MEMBER];
+ } mm_page_state;
+ 
+ 
+ static Page verify_minmax_page(bytea *raw_page, uint16 type,
+ 				 const char *strtype);
+ 
+ Datum
+ minmax_page_type(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page = VARDATA(raw_page);
+ 	MinmaxSpecialSpace *special;
+ 	char *type;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	switch (special->type)
+ 	{
+ 		case MINMAX_PAGETYPE_META:
+ 			type = "meta";
+ 			break;
+ 		case MINMAX_PAGETYPE_REVMAP_ARRAY:
+ 			type = "revmap array";
+ 			break;
+ 		case MINMAX_PAGETYPE_REVMAP:
+ 			type = "revmap";
+ 			break;
+ 		case MINMAX_PAGETYPE_REGULAR:
+ 			type = "regular";
+ 			break;
+ 		default:
+ 			type = psprintf("unknown (%02x)", special->type);
+ 			break;
+ 	}
+ 
+ 	PG_RETURN_TEXT_P(cstring_to_text(type));
+ }
+ 
+ /*
+  * Verify that the given bytea contains a minmax page of the indicated page
+  * type, or die in the attempt.  A pointer to the page is returned.
+  */
+ static Page
+ verify_minmax_page(bytea *raw_page, uint16 type, const char *strtype)
+ {
+ 	Page	page;
+ 	int		raw_page_size;
+ 	MinmaxSpecialSpace *special;
+ 
+ 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+ 
+ 	if (raw_page_size < SizeOfPageHeaderData)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("input page too small"),
+ 				 errdetail("Expected size %d, got %d", raw_page_size, BLCKSZ)));
+ 
+ 	page = VARDATA(raw_page);
+ 
+ 	/* verify the special space says this page is what we want */
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	if (special->type != type)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("page is not a Minmax page of type \"%s\"", strtype),
+ 				 errdetail("Expected special type %08x, got %08x.",
+ 						   type, special->type)));
+ 
+ 	return page;
+ }
+ 
+ 
+ /*
+  * Extract all item values from a minmax index page
+  *
+  * Usage: SELECT * FROM minmax_page_items(get_raw_page('idx', 1), 'idx'::regclass);
+  */
+ Datum
+ minmax_page_items(PG_FUNCTION_ARGS)
+ {
+ 	mm_page_state *state;
+ 	FuncCallContext *fctx;
+ 
+ 	if (!superuser())
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ 				 (errmsg("must be superuser to use raw page functions"))));
+ 
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 		Oid			indexRelid = PG_GETARG_OID(1);
+ 		Page		page;
+ 		TupleDesc	tupdesc;
+ 		MemoryContext mctx;
+ 		Relation	indexRel;
+ 		AttrNumber	attno;
+ 
+ 		/* minimally verify the page we got */
+ 		page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_REGULAR, "regular");
+ 
+ 		/* create a function context for cross-call persistence */
+ 		fctx = SRF_FIRSTCALL_INIT();
+ 
+ 		/* switch to memory context appropriate for multiple function calls */
+ 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+ 
+ 		/* Build a tuple descriptor for our result type */
+ 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 			elog(ERROR, "return type must be a row type");
+ 
+ 		indexRel = index_open(indexRelid, AccessShareLock);
+ 
+ 		state = palloc(offsetof(mm_page_state, outputfn) +
+ 					   sizeof(FmgrInfo) * RelationGetDescr(indexRel)->natts);
+ 
+ 		state->tupdesc = CreateTupleDescCopy(RelationGetDescr(indexRel));
+ 		state->page = page;
+ 		state->offset = FirstOffsetNumber;
+ 		state->unusedItem = false;
+ 		state->done = false;
+ 		state->dtup = NULL;
+ 
+ 		index_close(indexRel, AccessShareLock);
+ 
+ 		for (attno = 1; attno <= state->tupdesc->natts; attno++)
+ 		{
+ 			Oid		output;
+ 			bool	isVarlena;
+ 
+ 			getTypeOutputInfo(state->tupdesc->attrs[attno - 1]->atttypid,
+ 							  &output, &isVarlena);
+ 			fmgr_info(output, &state->outputfn[attno - 1]);
+ 		}
+ 
+ 		fctx->user_fctx = state;
+ 		fctx->tuple_desc = BlessTupleDesc(tupdesc);
+ 
+ 		MemoryContextSwitchTo(mctx);
+ 	}
+ 
+ 	fctx = SRF_PERCALL_SETUP();
+ 	state = fctx->user_fctx;
+ 
+ 	if (!state->done)
+ 	{
+ 		HeapTuple	result;
+ 		Datum		values[6];
+ 		bool		nulls[6];
+ 
+ 		/*
+ 		 * This loop is called once for every attribute of every tuple in the
+ 		 * page.  At the start of a tuple, we get a NULL dtup; that's our
+ 		 * signal for obtaining and decoding the next one.  If that's not the
+ 		 * case, we output the next attribute.
+ 		 */
+ 		if (state->dtup == NULL)
+ 		{
+ 			MMTuple	   *tup;
+ 			MemoryContext mctx;
+ 			ItemId		itemId;
+ 
+ 			/* deformed tuple must live across calls */
+ 			mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+ 
+ 			/* verify item status: if there's no data, we can't decode */
+ 			itemId = PageGetItemId(state->page, state->offset);
+ 			if (ItemIdIsUsed(itemId))
+ 			{
+ 				tup = (MMTuple *) PageGetItem(state->page,
+ 											  PageGetItemId(state->page,
+ 															state->offset));
+ 				state->dtup = minmax_deform_tuple(state->tupdesc, tup);
+ 				state->attno = 1;
+ 				state->unusedItem = false;
+ 			}
+ 			else
+ 				state->unusedItem = true;
+ 
+ 			MemoryContextSwitchTo(mctx);
+ 		}
+ 		else
+ 			state->attno++;
+ 
+ 		MemSet(nulls, 0, sizeof(nulls));
+ 
+ 		if (state->unusedItem)
+ 		{
+ 			values[0] = UInt16GetDatum(state->offset);
+ 			nulls[1] = true;
+ 			nulls[2] = true;
+ 			nulls[3] = true;
+ 			nulls[4] = true;
+ 			nulls[5] = true;
+ 		}
+ 		else
+ 		{
+ 			int		att = state->attno - 1;
+ 
+ 			values[0] = UInt16GetDatum(state->offset);
+ 			values[1] = UInt16GetDatum(state->attno);
+ 			values[2] = BoolGetDatum(state->dtup->values[att].allnulls);
+ 			values[3] = BoolGetDatum(state->dtup->values[att].hasnulls);
+ 			if (!state->dtup->values[att].allnulls)
+ 			{
+ 				FmgrInfo   *outputfn = &state->outputfn[att];
+ 				MMValues   *mmvalues = &state->dtup->values[att];
+ 
+ 				values[4] = CStringGetTextDatum(OutputFunctionCall(outputfn,
+ 																   mmvalues->min));
+ 				values[5] = CStringGetTextDatum(OutputFunctionCall(outputfn,
+ 																   mmvalues->max));
+ 			}
+ 			else
+ 			{
+ 				nulls[4] = true;
+ 				nulls[5] = true;
+ 			}
+ 		}
+ 
+ 		result = heap_form_tuple(fctx->tuple_desc, values, nulls);
+ 
+ 		/*
+ 		 * If the item was unused, jump straight to the next one; otherwise,
+ 		 * the only cleanup needed here is to set our signal to go to the next
+ 		 * tuple in the following iteration, by freeing the current one.
+ 		 */
+ 		if (state->unusedItem)
+ 			state->offset = OffsetNumberNext(state->offset);
+ 		else if (state->attno >= state->tupdesc->natts)
+ 		{
+ 			pfree(state->dtup);
+ 			state->dtup = NULL;
+ 			state->offset = OffsetNumberNext(state->offset);
+ 		}
+ 
+ 		/*
+ 		 * If we're beyond the end of the page, set flag to end the function in
+ 		 * the following iteration.
+ 		 */
+ 		if (state->offset > PageGetMaxOffsetNumber(state->page))
+ 			state->done = true;
+ 
+ 		SRF_RETURN_NEXT(fctx, HeapTupleGetDatum(result));
+ 	}
+ 
+ 	SRF_RETURN_DONE(fctx);
+ }
+ 
+ Datum
+ minmax_metapage_info(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page;
+ 	MinmaxMetaPageData *meta;
+ 	TupleDesc	tupdesc;
+ 	Datum		values[2];
+ 	bool		nulls[2];
+ 	ArrayBuildState *astate = NULL;
+ 	HeapTuple	htup;
+ 	int			i;
+ 
+ 	page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_META, "metapage");
+ 
+ 	/* Build a tuple descriptor for our result type */
+ 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 		elog(ERROR, "return type must be a row type");
+ 	tupdesc = BlessTupleDesc(tupdesc);
+ 
+ 	/* Extract values from the metapage */
+ 	meta = (MinmaxMetaPageData *) PageGetContents(page);
+ 	MemSet(nulls, 0, sizeof(nulls));
+ 	values[0] = Int32GetDatum(meta->minmaxVersion);
+ 
+ 	/* Extract (possibly empty) list of revmap array page numbers. */
+ 	for (i = 0; i < MAX_REVMAP_ARRAYPAGES; i++)
+ 	{
+ 		BlockNumber	blkno;
+ 
+ 		blkno = meta->revmapArrayPages[i];
+ 		if (blkno == InvalidBlockNumber)
+ 			break;	/* XXX or continue? */
+ 		astate = accumArrayResult(astate, Int64GetDatum((int64) blkno),
+ 								  false, INT8OID, CurrentMemoryContext);
+ 	}
+ 	if (astate == NULL)
+ 		nulls[1] = true;
+ 	else
+ 		values[1] = makeArrayResult(astate, CurrentMemoryContext);
+ 
+ 	htup = heap_form_tuple(tupdesc, values, nulls);
+ 
+ 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
+ }
+ 
+ /*
+  * Return the BlockNumber array stored in a revmap array page
+  */
+ Datum
+ minmax_revmap_array_data(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page;
+ 	ArrayBuildState *astate = NULL;
+ 	RevmapArrayContents *contents;
+ 	Datum		blkarr;
+ 	int			i;
+ 
+ 	page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_REVMAP_ARRAY,
+ 							  "revmap array");
+ 
+ 	contents = (RevmapArrayContents *) PageGetContents(page);
+ 
+ 	for (i = 0; i < contents->rma_nblocks; i++)
+ 		astate = accumArrayResult(astate,
+ 								  Int64GetDatum((int64) contents->rma_blocks[i]),
+ 								  false, INT8OID, CurrentMemoryContext);
+ 	Assert(astate != NULL);
+ 
+ 	blkarr = makeArrayResult(astate, CurrentMemoryContext);
+ 	PG_RETURN_DATUM(blkarr);
+ }
+ 
+ /*
+  * Return the TID array stored in a minmax revmap page
+  */
+ Datum
+ minmax_revmap_data(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page;
+ 	RevmapContents *contents;
+ 	TupleDesc	tupdesc;
+ 	Datum		values[2];
+ 	bool		nulls[2];
+ 	HeapTuple	htup;
+ 	ArrayBuildState *astate = NULL;
+ 	int			i;
+ 
+ 	page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_REVMAP, "revmap");
+ 
+ 	/* Build a tuple descriptor for our result type */
+ 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 		elog(ERROR, "return type must be a row type");
+ 	tupdesc = BlessTupleDesc(tupdesc);
+ 
+ 	/* Extract values from the revmap page */
+ 	contents = (RevmapContents *) PageGetContents(page);
+ 	MemSet(nulls, 0, sizeof(nulls));
+ 	values[0] = Int64GetDatum((uint64) contents->rmr_logblk);
+ 
+ 	/* Extract (possibly empty) list of TIDs in this page. */
+ 	for (i = 0; i < REGULAR_REVMAP_PAGE_MAXITEMS; i++)
+ 	{
+ 		ItemPointer	tid;
+ 
+ 		tid = &contents->rmr_tids[i];
+ 		astate = accumArrayResult(astate,
+ 								  PointerGetDatum(tid),
+ 								  false, TIDOID, CurrentMemoryContext);
+ 	}
+ 	if (astate == NULL)
+ 		nulls[1] = true;
+ 	else
+ 		values[1] = makeArrayResult(astate, CurrentMemoryContext);
+ 
+ 	htup = heap_form_tuple(tupdesc, values, nulls);
+ 
+ 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
+ }
*** a/contrib/pageinspect/pageinspect--1.2.sql
--- b/contrib/pageinspect/pageinspect--1.2.sql
***************
*** 99,104 **** AS 'MODULE_PATHNAME', 'bt_page_items'
--- 99,148 ----
  LANGUAGE C STRICT;
  
  --
+ -- minmax_page_type()
+ --
+ CREATE FUNCTION minmax_page_type(IN page bytea)
+ RETURNS text
+ AS 'MODULE_PATHNAME', 'minmax_page_type'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_metapage_info()
+ --
+ CREATE FUNCTION minmax_metapage_info(IN page bytea,
+ 	OUT version integer, OUT revmap_array_pages BIGINT[])
+ AS 'MODULE_PATHNAME', 'minmax_metapage_info'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_page_items()
+ --
+ CREATE FUNCTION minmax_page_items(IN page bytea, IN index_oid oid,
+ 	OUT itemoffset int,
+ 	OUT attnum int,
+ 	OUT allnulls bool,
+ 	OUT hasnulls bool,
+ 	OUT min text,
+ 	OUT max text)
+ RETURNS SETOF record
+ AS 'MODULE_PATHNAME', 'minmax_page_items'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_revmap_array_data()
+ CREATE FUNCTION minmax_revmap_array_data(IN page bytea,
+ 	OUT revmap_pages BIGINT[])
+ AS 'MODULE_PATHNAME', 'minmax_revmap_array_data'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_revmap_data()
+ CREATE FUNCTION minmax_revmap_data(IN page bytea,
+ 	OUT logblk BIGINT, OUT pages tid[])
+ AS 'MODULE_PATHNAME', 'minmax_revmap_data'
+ LANGUAGE C STRICT;
+ 
+ --
  -- fsm_page_contents()
  --
  CREATE FUNCTION fsm_page_contents(IN page bytea)
*** a/contrib/pg_xlogdump/rmgrdesc.c
--- b/contrib/pg_xlogdump/rmgrdesc.c
***************
*** 13,18 ****
--- 13,19 ----
  #include "access/gist_private.h"
  #include "access/hash.h"
  #include "access/heapam_xlog.h"
+ #include "access/minmax_xlog.h"
  #include "access/multixact.h"
  #include "access/nbtree.h"
  #include "access/rmgr.h"
*** /dev/null
--- b/minmax-proposal
***************
*** 0 ****
--- 1,300 ----
+ Minmax Range Indexes
+ ====================
+ 
+ Minmax indexes are a new access method intended to enable very fast scanning of
+ extremely large tables.
+ 
+ The essential idea of a minmax index is to keep track of the min() and max()
+ values in consecutive groups of heap pages (page ranges).  These values can be
+ used by constraint exclusion to avoid scanning such pages, depending on query
+ quals.
+ 
+ The main drawback of this is having to update the stored min/max values of each
+ page range as tuples are inserted into them.
+ 
+ Other database systems already have this feature. Some examples:
+ 
+ * Oracle Exadata calls this "storage indexes"
+   http://richardfoote.wordpress.com/category/storage-indexes/
+ 
+ * Netezza has "zone maps"
+   http://nztips.com/2010/11/netezza-integer-join-keys/
+ 
+ * Infobright has this automatically within their "data packs"
+   http://www.infobright.org/Blog/Entry/organizing_data_and_more_about_rough_data_contest/
+ 
+ * MonetDB seems to have it
+   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.108.2662
+   "Cooperative Scans: Dynamic Bandwidth Sharing in a DBMS"
+ 
+ Index creation
+ --------------
+ 
+ To create a minmax index, we use the standard wording:
+ 
+   CREATE INDEX foo_minmax_idx ON foo USING MINMAX (a, b, e);
+ 
+ Partial indexes are not supported; since an index is concerned with minimum and
+ maximum values of the involved columns across all the pages in the table, it
+ doesn't make sense to exclude values.  Another way to see "partial" indexes
+ here would be those that only considered some pages in the table instead of all
+ of them; but this would be difficult to implement and manage and, most likely,
+ pointless.
+ 
+ Expressional indexes can probably be supported in the future, but we disallow
+ them initially for conceptual simplicity.
+ 
+ Having multiple minmax indexes in the same table is acceptable, though most of
+ the time it would make more sense to have a single index covering all the
+ interesting columns.  Multiple indexes might be useful for columns added later.
+ 
+ Access Method Design
+ --------------------
+ 
+ Since item pointers are not stored inside indexes of this type, it is not
+ possible to support the amgettuple interface.  Instead, we only provide
+ amgetbitmap support; scanning a relation using this index requires a recheck
+ node on top.  The amgetbitmap routine would return a TIDBitmap comprising all
+ the pages in those page groups that match the query qualifications; the recheck
+ node prunes tuples that are not visible per snapshot and those that are not
+ visible per query quals.
+ 
+ For each supported datatype, we need an opclass with the following catalog
+ entries:
+ 
+ - support operators (pg_amop): same as btree (<, <=, =, >=, >)
+ 
+ These operators are used pervasively:
+ 
+ - The optimizer requires them to evaluate queries, so that the index is chosen
+   when queries on the indexed table are planned.
+ - During index construction (ambuild), they are used to determine the boundary
+   values for each page range.
+ - During index updates (aminsert), they are used to determine whether the new
+   heap tuple matches the existing index tuple; and if not, they are used to
+   construct the new index tuple.
+ 
+ In each index tuple (corresponding to one page range), we store:
+ - for each indexed column:
+   * minimum value across all tuples in the range
+   * maximum value across all tuples in the range
+   * are there nulls present in any tuple?
+   * are null all the values in all tuples in the range?
+ 
+ These null bits are stored in a single null bitmask of length 2x number of
+ columns.
+ 
+ With the default INDEX_MAX_KEYS of 32, and considering columns of 8-byte length
+ types such as timestamptz or bigint, each tuple would be 522 bytes in length,
+ which seems reasonable.  There are 6 extra bytes for padding between the null
+ bitmask and the first data item, assuming 64-bit alignment; so the total size
+ for such an index would actually be 528 bytes.
+ 
+ This maximum index tuple size is calculated as: mt_info (2 bytes) + null bitmap
+ (8 bytes) + data value (8 bytes) * 32 * 2
+ 
+ (Of course, larger columns are possible, such as varchar, but creating minmax
+ indexes on such columns seems of little practical usefulness.  Also, the
+ usefulness of an index containing so many columns is dubious, at best.)
+ 
+ There can be gaps where some pages have no covering index entry. In particular,
+ the last few pages of the table would commonly not be summarized.
+ 
+ The Range Reverse Map
+ ---------------------
+ 
+ To find out the index tuple for a particular page range, we have a
+ separate fork called the range reverse map.  This fork stores one TID per
+ range, which is the address of the index tuple summarizing that range.  Since
+ these map entries are fixed size, it is possible to compute the address of the
+ range map entry for any given heap page.
+ 
+ When a new heap tuple is inserted in a summarized page range, it is possible to
+ compare the existing index tuple with the new heap tuple.  If the heap tuple is
+ outside the minimum/maximum boundaries given by the index tuple for any indexed
+ column (or if the new heap tuple contains null values but the index tuple
+ indicate there are no nulls), it is necessary to create a new index tuple with
+ the new values.  To do this, a new index tuple is inserted, and the reverse range
+ map is updated to point to it.  The old index tuple is left in place, for later
+ garbage collection.
+ 
+ If the reverse range map points to an invalid TID, the corresponding page range
+ is not summarized.
+ 
+ A minmax index is updated by creating a new summary tuple whenever an
+ insertion outside the min-max interval occurs in the pages within the range.
+ 
+ To scan a table following a minmax index, we scan the reverse range map
+ sequentially.  This yields index tuples in ascending page range order.
+ Query quals are matched to each index tuple; if they match, each page within
+ the page range is returned as part of the output TID bitmap.  If there's no
+ match, they are skipped.  Reverse range map entries returning invalid index
+ TIDs, that is unsummarized page ranges, are also returned in the TID bitmap.
+ 
+ To store the range reverse map, we reuse the VISIBILITYMAP_FORKNUM, since a VM
+ does not make sense for a minmax index anyway (XXX -- really??)
+ 
+ When tuples are added to unsummarized pages, nothing needs to happen.
+ 
+ Heap tuples can be removed from anywhere without restriction.
+ 
+ Index entries that are not referenced from the revmap can be removed from the
+ main fork.  This currently happens at amvacuumcleanup, though it could be
+ carried out separately; no heap scan is necessary to determine which tuples
+ are unreachable.
+ 
+ Summarization
+ -------------
+ 
+ At index creation time, the whole table is scanned; for each page range the
+ minimum and maximum values of each indexed column and nulls bitmap are
+ collected and stored in the index.  The possibly-incomplete range at the end
+ of the table is not included.
+ 
+ Once in a while, it is necessary to summarize a bunch of unsummarized pages
+ (because the table has grown since the index was created), or re-summarize a
+ range that has been marked invalid.  This is simple: scan the page range
+ calculating the min() and max() for each indexed column, then insert the new
+ index entry at the end of the index.  The main interesting questions are:
+ 
+ a) when to do it
+    The perfect time to do it is as soon as a complete page range of the
+    configured range size has been filled.
+ 
+ b) who does it (what process)
+    It doesn't seem a good idea to have a client-connected process do it;
+    it would incur unwanted latency.  Three other options are (i) to spawn a
+    specialized process to do it, which perhaps can be signalled by a
+    client-connected process that executes a scan and notices the need to run
+    summarization; or (ii) to let autovacuum do it, as a separate new
+    maintenance task.  This seems simple enough to bolt on top of already
+    existing autovacuum infrastructure.  The timing constraints of autovacuum
+    might be undesirable, though.  (iii) wait for user command.
+ 
+ The easiest way to go around this seems to have vacuum do it.  That way we can
+ simply do re-summarization on the amvacuumcleanup routine.  Other answers would
+ mean we need a separate AM routine, which appears unwarranted at this stage.
+ 
+ Vacuuming
+ ---------
+ 
+ Vacuuming a table that has a minmax index does not represent a significant
+ challenge.  Since no heap TIDs are stored, it's not necessary to scan the index
+ when heap tuples are removed.  It might be that some min() value can be
+ incremented, or some max() value can be decremented; but this would represent
+ an optimization opportunity only, not a correctness issue.  Perhaps it's
+ simpler to represent this as the need to re-run summarization on the affected
+ page range.
+ 
+ Note that if there are no indexes on the table other than the minmax index,
+ usage of maintenance_work_mem by vacuum can be decreased significantly, because
+ no detailed index scan needs to take place (and thus it's not necessary for
+ vacuum to save TIDs to remove).  This optimization opportunity is best left for
+ future improvement.
+ 
+ Locking considerations
+ ----------------------
+ 
+ To read the TID during an index scan, we follow this protocol:
+ 
+ * read revmap page
+ * obtain share lock on the revmap buffer
+ * read the TID
+ * obtain share lock on buffer of main fork
+ * LockTuple the TID (using the index as relation).  A shared lock is
+   sufficient.  We need the LockTuple to prevent VACUUM from recycling
+   the index tuple; see below.
+ * release revmap buffer lock
+ * read the index tuple
+ * release the tuple lock
+ * release main fork buffer lock
+ 
+ 
+ To update the summary tuple for a page range, we use this protocol:
+ 
+ * insert a new index tuple somewhere in the main fork; note its TID
+ * read revmap page
+ * obtain exclusive lock on revmap buffer
+ * write the TID
+ * release lock
+ 
+ This ensures no concurrent reader can obtain a partially-written TID.
+ Note we don't need a tuple lock here.  Concurrent scans don't have to
+ worry about whether they got the old or new index tuple: if they get the
+ old one, the tighter values are okay from a correctness standpoint because
+ due to MVCC they can't possibly see the just-inserted heap tuples anyway.
+ 
+ 
+ For vacuuming, we need to figure out which index tuples are no longer
+ referenced from the reverse range map.  This requires some brute force,
+ but is simple:
+ 
+ 1) scan the complete index, store each existing TIDs in a dynahash.
+    Hash key is the TID, hash value is a boolean initially set to false.
+ 2) scan the complete revmap sequentially, read the TIDs on each page.  Share
+    lock on each page is sufficient.  For each TID so obtained, grab the
+    element from the hash and update the boolean to true.
+ 3) Scan the index again; for each tuple found, search the hash table.
+    If the tuple is not present in hash, it must have been added after our
+    initial scan; ignore it.  If tuple is present in hash, and the hash flag is
+    true, then the tuple is referenced from the revmap; ignore it.  If the hash
+    flag is false, then the index tuple is no longer referenced by the revmap;
+    but it could be about to be accessed by a concurrent scan.  Do
+    ConditionalLockTuple.  If this fails, ignore the tuple (it's in use), it
+    will be deleted by a future vacuum.  If lock is acquired, then we can safely
+    remove the index tuple.
+ 4) Index pages with free space can be detected by this second scan.  Register
+    those with the FSM.
+ 
+ Note this doesn't require scanning the heap at all, or being involved in
+ the heap's cleanup procedure.  Also, there is no need to LockBufferForCleanup,
+ which is a nice property because index scans keep pages pinned for long
+ periods.
+ 
+ 
+ 
+ Optimizer
+ ---------
+ 
+ In order to make this all work, the only thing we need to do is ensure we have a
+ good enough opclass and amcostestimate.  With this, the optimizer is able to pick
+ up the index on its own.
+ 
+ 
+ Open questions
+ --------------
+ 
+ * Same-size page ranges?
+   Current related literature seems to consider that each "index entry" in a
+   minmax index must cover the same number of pages.  There doesn't seem to be a
+   hard reason for this to be so; it might make sense to allow the index to
+   self-tune so that some index entries cover smaller page ranges, if this allows
+   the min()/max() values to be more compact.  This would incur larger minmax
+   overhead for the index itself, but might allow better pruning of page ranges
+   during scan.  In the limit of one index tuple per page, the index itself would
+   occupy too much space, even though we would be able to skip reading the most
+   heap pages, because the min()/max() ranges are tight; in the opposite limit of
+   a single tuple that summarizes the whole table, we wouldn't be able to prune
+   anything even though the index is very small.  This can probably be made to work
+   by using the reverse range map as an index in itself.
+ 
+ * More compact representation for TIDBitmap?
+   TIDBitmap is the structure used to represent bitmap scans.  The
+   representation of lossy page ranges is not optimal for our purposes, because
+   it uses a Bitmapset to represent pages in the range; since we're going to return
+   all pages in a large range, it might be more convenient to allow for a
+   struct that uses start and end page numbers to represent the range, instead.
+ 
+ 
+ 
+ References:
+ 
+ Email thread on pgsql-hackers
+   http://www.postgresql.org/message-id/1199296574.7260.149.camel@ebony.site
+   From: Simon Riggs
+   To: pgsql-hackers
+   Subject: Dynamic Partitioning using Segment Visibility Map
+ 
+ http://wiki.postgresql.org/wiki/Segment_Exclusion
+ http://wiki.postgresql.org/wiki/Segment_Visibility_Map
+ 
*** a/src/backend/access/Makefile
--- b/src/backend/access/Makefile
***************
*** 8,13 **** subdir = src/backend/access
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
! SUBDIRS	    = common gin gist hash heap index nbtree rmgrdesc spgist transam
  
  include $(top_srcdir)/src/backend/common.mk
--- 8,13 ----
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
! SUBDIRS	    = common gin gist hash heap index minmax nbtree rmgrdesc spgist transam
  
  include $(top_srcdir)/src/backend/common.mk
*** a/src/backend/access/common/reloptions.c
--- b/src/backend/access/common/reloptions.c
***************
*** 209,214 **** static relopt_int intRelOpts[] =
--- 209,221 ----
  			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
  		}, -1, 0, 2000000000
  	},
+ 	{
+ 		{
+ 			"pages_per_range",
+ 			"Number of pages that each page range covers in a Minmax index",
+ 			RELOPT_KIND_MINMAX
+ 		}, 128, 1, 131072
+ 	},
  
  	/* list terminator */
  	{{NULL}}
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 271,276 **** initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
--- 271,278 ----
  		scan->rs_startblock = 0;
  	}
  
+ 	scan->rs_initblock = 0;
+ 	scan->rs_numblocks = InvalidBlockNumber;
  	scan->rs_inited = false;
  	scan->rs_ctup.t_data = NULL;
  	ItemPointerSetInvalid(&scan->rs_ctup.t_self);
***************
*** 296,301 **** initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
--- 298,311 ----
  		pgstat_count_heap_scan(scan->rs_rd);
  }
  
+ void
+ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
+ {
+ 	scan->rs_startblock = startBlk;
+ 	scan->rs_initblock = startBlk;
+ 	scan->rs_numblocks = numBlks;
+ }
+ 
  /*
   * heapgetpage - subroutine for heapgettup()
   *
***************
*** 636,642 **** heapgettup(HeapScanDesc scan,
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
--- 646,653 ----
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
***************
*** 646,652 **** heapgettup(HeapScanDesc scan,
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
--- 657,664 ----
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
***************
*** 897,903 **** heapgettup_pagemode(HeapScanDesc scan,
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
--- 909,916 ----
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
***************
*** 907,913 **** heapgettup_pagemode(HeapScanDesc scan,
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
--- 920,927 ----
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
*** /dev/null
--- b/src/backend/access/minmax/Makefile
***************
*** 0 ****
--- 1,17 ----
+ #-------------------------------------------------------------------------
+ #
+ # Makefile--
+ #    Makefile for access/minmax
+ #
+ # IDENTIFICATION
+ #    src/backend/access/minmax/Makefile
+ #
+ #-------------------------------------------------------------------------
+ 
+ subdir = src/backend/access/minmax
+ top_builddir = ../../../..
+ include $(top_builddir)/src/Makefile.global
+ 
+ OBJS = minmax.o mmrevmap.o mmtuple.o mmxlog.o
+ 
+ include $(top_srcdir)/src/backend/common.mk
*** /dev/null
--- b/src/backend/access/minmax/minmax.c
***************
*** 0 ****
--- 1,1680 ----
+ /*
+  * minmax.c
+  *		Implementation of Minmax indexes for Postgres
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/minmax.c
+  *
+  * TODO
+  * 		* support collatable datatypes
+  *		* ScalarArrayOpExpr
+  * 		* Make use of the stored NULL bits
+  * 		* fill in the XLog routines
+  * 		* we can support unlogged indexes now
+  */
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
+ #include "access/minmax_xlog.h"
+ #include "access/reloptions.h"
+ #include "access/relscan.h"
+ #include "access/xlogutils.h"
+ #include "catalog/index.h"
+ #include "catalog/pg_operator.h"
+ #include "commands/vacuum.h"
+ #include "miscadmin.h"
+ #include "pgstat.h"
+ #include "storage/bufmgr.h"
+ #include "storage/freespace.h"
+ #include "storage/lmgr.h"
+ #include "utils/datum.h"
+ #include "utils/lsyscache.h"
+ #include "utils/memutils.h"
+ #include "utils/syscache.h"
+ 
+ 
+ /*
+  * We use a MMBuildState during initial construction of a Minmax index.
+  * Within that struct, each column's contruction info is represented by a
+  * MMPerColBuildInfo struct.  The running state is all kept in a
+  * DeformedMMTuple.
+  */
+ typedef struct MMPerColBuildInfo
+ {
+ 	int			typLen;
+ 	bool		typByVal;
+ 	FmgrInfo	lt;
+ 	FmgrInfo	gt;
+ } MMPerColBuildInfo;
+ 
+ typedef struct MMBuildState
+ {
+ 	Relation	irel;
+ 	int			numtuples;
+ 	Buffer		currentInsertBuf;
+ 	BlockNumber currRangeStart;
+ 	BlockNumber nextRangeAt;
+ 	mmRevmapAccess *rmAccess;
+ 	TupleDesc	indexDesc;
+ 	TupleDesc	diskDesc;
+ 	DeformedMMTuple *dtuple;
+ 	MMPerColBuildInfo perColState[FLEXIBLE_ARRAY_MEMBER];
+ } MMBuildState;
+ 
+ static void mmbuildCallback(Relation index,
+ 				HeapTuple htup, Datum *values, bool *isnull,
+ 				bool tupleIsAlive, void *state);
+ static void get_mm_operator(Oid opfam, Oid idxtypid, Oid keytypid,
+ 				StrategyNumber strategy, FmgrInfo *finfo);
+ static inline bool invoke_mm_operator(FmgrInfo *operator, Oid collation,
+ 				   Datum left, Datum right);
+ static void mm_doinsert(Relation idxrel, mmRevmapAccess *rmAccess,
+ 			Buffer *buffer, BlockNumber heapblkno, MMTuple *tup, Size itemsz);
+ static bool mm_getinsertbuffer(Relation irel, Buffer *buffer, Size itemsz);
+ 
+ 
+ 
+ 
+ /*
+  * A tuple in the heap is being inserted.  To keep a minmax index up to date,
+  * we need to obtain the relevant index tuple, compare its min()/max() stored
+  * values with those of the new tuple; if the tuple values are in range,
+  * there's nothing to do; otherwise we need to update the index (either by
+  * a new index tuple and repointing the revmap, or by overwriting the existing
+  * index tuple).
+  *
+  * If the range is not currently summarized (i.e. the revmap returns InvalidTid
+  * for it), there's nothing to do either.
+  */
+ Datum
+ mminsert(PG_FUNCTION_ARGS)
+ {
+ 	Relation	idxRel = (Relation) PG_GETARG_POINTER(0);
+ 	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
+ 	bool	   *nulls = (bool *) PG_GETARG_POINTER(2);
+ 	ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3);
+ 
+ 	/* we ignore the rest of our arguments */
+ 	mmRevmapAccess *rmAccess;
+ 	Datum		indclassDatum;
+ 	bool		isnull;
+ 	oidvector  *indclass;
+ 	TupleDesc	tupdesc;
+ 	ItemId		origlp;
+ 	MMTuple    *mmtup;
+ 	DeformedMMTuple *dtup;
+ 	ItemPointerData idxtid;
+ 	BlockNumber heapBlk;
+ 	BlockNumber iblk;
+ 	OffsetNumber ioff;
+ 	Buffer		buf;
+ 	IndexInfo  *indexInfo;
+ 	Page		page;
+ 	int			keyno;
+ 	FmgrInfo   *lt;
+ 	FmgrInfo   *gt;
+ 	bool		need_insert = false;
+ 
+ 	rmAccess = mmRevmapAccessInit(idxRel);
+ 
+ 	heapBlk = ItemPointerGetBlockNumber(heaptid);
+ 	mmGetHeapBlockItemptr(rmAccess, heapBlk, &idxtid);
+ 	/* tuple lock on idxtid is grabbed by mmGetHeapBlockItemptr */
+ 
+ 	if (!ItemPointerIsValid(&idxtid))
+ 	{
+ 		/* nothing to do, range is unsummarized */
+ 		mmRevmapAccessTerminate(rmAccess);
+ 		return BoolGetDatum(false);
+ 	}
+ 
+ 	tupdesc = RelationGetDescr(idxRel);
+ 	indexInfo = BuildIndexInfo(idxRel);
+ 
+ 	lt = palloc(sizeof(FmgrInfo) * indexInfo->ii_NumIndexAttrs);
+ 	gt = palloc(sizeof(FmgrInfo) * indexInfo->ii_NumIndexAttrs);
+ 
+ 	/* grab the operators we will need: < and > for each indexed column */
+ 	indclassDatum = SysCacheGetAttr(INDEXRELID, idxRel->rd_indextuple,
+ 									Anum_pg_index_indclass, &isnull);
+ 	Assert(!isnull);
+ 	indclass = (oidvector *) DatumGetPointer(indclassDatum);
+ 	for (keyno = 0; keyno < indexInfo->ii_NumIndexAttrs; keyno++)
+ 	{
+ 		Oid			opfam = get_opclass_family(indclass->values[keyno]);
+ 		Oid			idxtypid = tupdesc->attrs[keyno]->atttypid;
+ 
+ 		get_mm_operator(opfam, idxtypid, idxtypid, BTLessStrategyNumber,
+ 						&lt[keyno]);
+ 		get_mm_operator(opfam, idxtypid, idxtypid, BTGreaterStrategyNumber,
+ 						&gt[keyno]);
+ 	}
+ 
+ 	iblk = ItemPointerGetBlockNumber(&idxtid);
+ 	ioff = ItemPointerGetOffsetNumber(&idxtid);
+ 	Assert(iblk != InvalidBlockNumber);
+ 	buf = ReadBuffer(idxRel, iblk);
+ 
+ 	LockBuffer(buf, BUFFER_LOCK_SHARE);
+ 	UnlockTuple(idxRel, &idxtid, ShareLock);
+ 	page = BufferGetPage(buf);
+ 	origlp = PageGetItemId(page, ioff);
+ 	mmtup = (MMTuple *) PageGetItem(page, origlp);
+ 
+ 	dtup = minmax_deform_tuple(tupdesc, mmtup);
+ 
+ 	/*
+ 	 * Compare the key values of the new tuple to the stored index values.
+ 	 * Note that we need to keep checking each column even after noticing that a
+ 	 * new tuple is necessary, because as a side effect this loop will update
+ 	 * the dtup with the values to insert in the new tuple.
+ 	 */
+ 	for (keyno = 0; keyno < indexInfo->ii_NumIndexAttrs; keyno++)
+ 	{
+ 		/*
+ 		 * If the new tuple contains a null in this attr, but the range index
+ 		 * tuple doesn't allow for nulls, we need a new summary tuple.
+ 		 */
+ 		if (nulls[keyno])
+ 		{
+ 			if (!dtup->values[keyno].hasnulls)
+ 			{
+ 				need_insert = true;
+ 				dtup->values[keyno].hasnulls = true;
+ 			}
+ 			else
+ 				continue;
+ 		}
+ 
+ 		/*
+ 		 * If the new key value is not within the min/max interval for this
+ 		 * range, we need a new summary tuple.
+ 		 */
+ 		if (invoke_mm_operator(&lt[keyno], InvalidOid, values[keyno],
+ 							   dtup->values[keyno].min))
+ 		{
+ 			dtup->values[keyno].min = values[keyno];	/* XXX datumCopy? */
+ 			need_insert = true;
+ 		}
+ 		if (invoke_mm_operator(&gt[keyno], InvalidOid, values[keyno],
+ 							   dtup->values[keyno].max))
+ 		{
+ 			dtup->values[keyno].max = values[keyno];	/* XXX datumCopy? */
+ 			need_insert = true;
+ 		}
+ 	}
+ 
+ 	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 	if (need_insert)
+ 	{
+ 		TupleDesc	diskDesc;
+ 		Size		tupsz;
+ 		MMTuple    *tup;
+ 
+ 		diskDesc = minmax_get_descr(tupdesc);
+ 		tup = minmax_form_tuple(tupdesc, diskDesc, dtup, &tupsz);
+ 
+ 		/*
+ 		 * If the size of the original tuple is greater or equal to the new
+ 		 * index tuple, we can overwrite.  This saves regular page bloat, and
+ 		 * also saves revmap traffic.  This might leave some unused space
+ 		 * before the start of the next tuple, but we don't worry about that
+ 		 * here.
+ 		 *
+ 		 * We avoid doing this when the itempointer of the index tuple would
+ 		 * change, because that would require an update to the revmap while
+ 		 * holding exclusive lock on this page, which would reduce concurrency.
+ 		 *
+ 		 * Note that we continue to acccess 'origlp' here, even though there
+ 		 * was an interval during which the page wasn't locked.  Since we hold
+ 		 * pin on the page, this is okay -- the buffer cannot go away from
+ 		 * under us, and also tuples cannot be shuffled around.
+ 		 */
+ 		if (tupsz >= ItemIdGetLength(origlp))
+ 		{
+ 			LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 			START_CRIT_SECTION();
+ 			PageOverwriteItemData(BufferGetPage(buf),
+ 								  ioff,
+ 								  (Item) tup, tupsz);
+ 			MarkBufferDirty(buf);
+ 
+ 			/* XLOG stuff */
+ 			if (RelationNeedsWAL(idxRel))
+ 			{
+ 				xl_minmax_insert	xlrec;
+ 				XLogRecPtr	recptr;
+ 				XLogRecData	rdata[2];
+ 				uint8		info = XLOG_MINMAX_INSERT;
+ 
+ 				xlrec.target.node = idxRel->rd_node;
+ 				xlrec.target.tid = idxtid;
+ 				xlrec.overwrite = true;
+ 				rdata[0].data = (char *) &xlrec;
+ 				rdata[0].len = SizeOfMinmaxInsert;
+ 				rdata[0].buffer = InvalidBuffer;
+ 				rdata[0].next = &(rdata[1]);
+ 
+ 				rdata[1].data = (char *) tup;
+ 				rdata[1].len = tupsz;
+ 				rdata[1].buffer = buf;
+ 				rdata[1].buffer_std = true;
+ 				rdata[1].next = NULL;
+ 
+ 				recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 				PageSetLSN(page, recptr);
+ 			}
+ 
+ 			END_CRIT_SECTION();
+ 
+ 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 		}
+ 		else
+ 		{
+ 			/*
+ 			 * The new tuple is larger than the original one, so we must insert
+ 			 * a new one the slow way.
+ 			 */
+ 			mm_doinsert(idxRel, rmAccess, &buf, heapBlk, tup, tupsz);
+ 
+ #ifdef NOT_YET
+ 			/*
+ 			 * Possible optimization: if we can grab an exclusive lock on the
+ 			 * buffer containing the old tuple right away, we can also seize
+ 			 * the opportunity to prune the old tuple and avoid some bloat.
+ 			 * This is not necessary for correctness.
+ 			 */
+ 			if (ConditionalLockBuffer(buf))
+ 			{
+ 				/* prune the old tuple */
+ 
+ 				LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 			}
+ #endif
+ 		}
+ 	}
+ 
+ 	ReleaseBuffer(buf);
+ 
+ 	mmRevmapAccessTerminate(rmAccess);
+ 
+ 	return BoolGetDatum(false);
+ }
+ 
+ Datum
+ mmbeginscan(PG_FUNCTION_ARGS)
+ {
+ 	Relation	r = (Relation) PG_GETARG_POINTER(0);
+ 	int			nkeys = PG_GETARG_INT32(1);
+ 	int			norderbys = PG_GETARG_INT32(2);
+ 	IndexScanDesc scan;
+ 
+ 	scan = RelationGetIndexScan(r, nkeys, norderbys);
+ 
+ 	PG_RETURN_POINTER(scan);
+ }
+ 
+ 
+ /*
+  * Execute the index scan.
+  *
+  * This works by reading index TIDs from the revmap, and obtaining the index
+  * tuples pointed to by them; the min/max values in them are compared to the
+  * scan keys.  We return into the TID bitmap all the pages in ranges
+  * corresponding to index tuples that match the scan keys.
+  *
+  * If a TID from the revmap is read as InvalidTID, we know that range is
+  * unsummarized.  Pages in those ranges need to be returned regardless of scan
+  * keys.
+  */
+ Datum
+ mmgetbitmap(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 	TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
+ 	Relation	idxRel = scan->indexRelation;
+ 	Buffer		currIdxBuf = InvalidBuffer;
+ 	Oid			heapOid;
+ 	Relation	heapRel;
+ 	mmRevmapAccess *rmAccess;
+ 	BlockNumber nblocks;
+ 	BlockNumber heapBlk;
+ 	BlockNumber	pagesPerRange;
+ 	TupleDesc	tupdesc;
+ 	AttrNumber	keyno;
+ 	Datum		indclassDatum;
+ 	bool		isnull;
+ 	oidvector  *indclass;
+ 	FmgrInfo   *lt;
+ 	FmgrInfo   *lteq;
+ 	FmgrInfo   *gteq;
+ 	FmgrInfo   *gt;
+ 
+ 	pgstat_count_index_scan(idxRel);
+ 
+ 	heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
+ 	heapRel = heap_open(heapOid, AccessShareLock);
+ 	nblocks = RelationGetNumberOfBlocks(heapRel);
+ 	heap_close(heapRel, AccessShareLock);
+ 
+ 	tupdesc = RelationGetDescr(idxRel);
+ 
+ 	lt = palloc(sizeof(FmgrInfo) * scan->numberOfKeys);
+ 	lteq = palloc(sizeof(FmgrInfo) * scan->numberOfKeys);
+ 	gteq = palloc(sizeof(FmgrInfo) * scan->numberOfKeys);
+ 	gt = palloc(sizeof(FmgrInfo) * scan->numberOfKeys);
+ 
+ 	/*
+ 	 * lookup the operators needed to determine range containment of each key
+ 	 * value.
+ 	 */
+ 	indclassDatum = SysCacheGetAttr(INDEXRELID, idxRel->rd_indextuple,
+ 									Anum_pg_index_indclass, &isnull);
+ 	Assert(!isnull);
+ 	indclass = (oidvector *) DatumGetPointer(indclassDatum);
+ 	for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
+ 	{
+ 		AttrNumber	keyattno;
+ 		Oid			opfam;
+ 		Oid			keytypid;
+ 		Oid			idxtypid;
+ 
+ 		keyattno = scan->keyData[keyno].sk_attno;
+ 		opfam = get_opclass_family(indclass->values[keyattno - 1]);
+ 		keytypid = scan->keyData[keyno].sk_subtype;
+ 		idxtypid = tupdesc->attrs[keyattno - 1]->atttypid;
+ 
+ 		get_mm_operator(opfam, idxtypid, keytypid, BTLessStrategyNumber,
+ 						&lt[keyno]);
+ 		get_mm_operator(opfam, idxtypid, keytypid, BTLessEqualStrategyNumber,
+ 						&lteq[keyno]);
+ 		get_mm_operator(opfam, idxtypid, keytypid, BTGreaterStrategyNumber,
+ 						&gt[keyno]);
+ 		get_mm_operator(opfam, idxtypid, keytypid, BTGreaterEqualStrategyNumber,
+ 						&gteq[keyno]);
+ 	}
+ 
+ 	/*
+ 	 * Now scan the revmap.  We start by querying for heap page 0,
+ 	 * incrementing by the number of pages per range; this gives us a full
+ 	 * view of the table.
+ 	 */
+ 	pagesPerRange = MinmaxGetPagesPerRange(idxRel);
+ 	rmAccess = mmRevmapAccessInit(idxRel);
+ 	for (heapBlk = 0; heapBlk < nblocks; heapBlk += pagesPerRange)
+ 	{
+ 		ItemPointerData itupptr;
+ 		bool		addrange;
+ 
+ 		mmGetHeapBlockItemptr(rmAccess, heapBlk, &itupptr);
+ 
+ 		/*
+ 		 * For revmap items that return InvalidTID, we must return the whole
+ 		 * range; otherwise, fetch the index item and compare it to the scan
+ 		 * keys.
+ 		 */
+ 		if (!ItemPointerIsValid(&itupptr))
+ 		{
+ 			addrange = true;
+ 		}
+ 		else
+ 		{
+ 			Page		page;
+ 			OffsetNumber idxoffno;
+ 			BlockNumber idxblkno;
+ 			MMTuple    *tup;
+ 			DeformedMMTuple *dtup;
+ 			int			keyno;
+ 
+ 			idxoffno = ItemPointerGetOffsetNumber(&itupptr);
+ 			idxblkno = ItemPointerGetBlockNumber(&itupptr);
+ 
+ 			if (currIdxBuf == InvalidBuffer ||
+ 				idxblkno != BufferGetBlockNumber(currIdxBuf))
+ 			{
+ 				if (currIdxBuf != InvalidBuffer)
+ 					UnlockReleaseBuffer(currIdxBuf);
+ 
+ 				Assert(idxblkno != InvalidBlockNumber);
+ 				currIdxBuf = ReadBuffer(idxRel, idxblkno);
+ 				LockBuffer(currIdxBuf, BUFFER_LOCK_SHARE);
+ 			}
+ 
+ 			UnlockTuple(idxRel, &itupptr, ShareLock);
+ 
+ 			page = BufferGetPage(currIdxBuf);
+ 			tup = (MMTuple *)
+ 				PageGetItem(page, PageGetItemId(page, idxoffno));
+ 			/* XXX probably need copies */
+ 			dtup = minmax_deform_tuple(tupdesc, tup);
+ 
+ 			/*
+ 			 * Compare scan keys with min/max values stored in range.  If scan
+ 			 * keys are matched, the page range must be added to the bitmap.
+ 			 */
+ 			for (keyno = 0, addrange = true;
+ 				 keyno < scan->numberOfKeys;
+ 				 keyno++)
+ 			{
+ 				ScanKey		key = &scan->keyData[keyno];
+ 				AttrNumber	keyattno = key->sk_attno;
+ 
+ 				/*
+ 				 * The analysis we need to make to decide whether to include a
+ 				 * page range in the output result is: is it possible for a
+ 				 * tuple contained within the min/max interval specified by
+ 				 * this index tuple to match what's specified by the scan key?
+ 				 * For example, for a query qual such as "WHERE col < 10" we
+ 				 * need to include a range whose minimum value is less than
+ 				 * 10.
+ 				 *
+ 				 * When there are multiple scan keys, failure to meet the
+ 				 * criteria for a single one of them is enough to discard the
+ 				 * range as a whole.
+ 				 */
+ 				switch (key->sk_strategy)
+ 				{
+ 					case BTLessStrategyNumber:
+ 						addrange =
+ 							invoke_mm_operator(&lt[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].min,
+ 											   key->sk_argument);
+ 						break;
+ 					case BTLessEqualStrategyNumber:
+ 						addrange =
+ 							invoke_mm_operator(&lteq[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].min,
+ 											   key->sk_argument);
+ 						break;
+ 					case BTEqualStrategyNumber:
+ 
+ 						/*
+ 						 * In the equality case (WHERE col = someval), we want
+ 						 * to return the current page range if the minimum
+ 						 * value in the range <= scan key, and the maximum
+ 						 * value >= scan key.
+ 						 */
+ 						addrange =
+ 							invoke_mm_operator(&lteq[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].min,
+ 											   key->sk_argument);
+ 						if (!addrange)
+ 							break;
+ 						/* max() >= scankey */
+ 						addrange =
+ 							invoke_mm_operator(&gteq[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].max,
+ 											   key->sk_argument);
+ 						break;
+ 					case BTGreaterEqualStrategyNumber:
+ 						addrange =
+ 							invoke_mm_operator(&gteq[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].max,
+ 											   key->sk_argument);
+ 						break;
+ 					case BTGreaterStrategyNumber:
+ 						addrange =
+ 							invoke_mm_operator(&gt[keyno], InvalidOid,
+ 											   dtup->values[keyattno - 1].max,
+ 											   key->sk_argument);
+ 						break;
+ 					default:
+ 						/* can't happen */
+ 						elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+ 						addrange = false;
+ 						break;
+ 				}
+ 
+ 				/*
+ 				 * If the current scan key doesn't match the range values,
+ 				 * don't look at further ones.
+ 				 */
+ 				if (!addrange)
+ 					break;
+ 			}
+ 
+ 			/* XXX anything to free here? */
+ 			pfree(dtup);
+ 		}
+ 
+ 		if (addrange)
+ 		{
+ 			BlockNumber pageno;
+ 
+ 			for (pageno = heapBlk;
+ 				 pageno <= heapBlk + pagesPerRange - 1;
+ 				 pageno++)
+ 				tbm_add_page(tbm, pageno);
+ 		}
+ 	}
+ 
+ 	mmRevmapAccessTerminate(rmAccess);
+ 	if (currIdxBuf != InvalidBuffer)
+ 		UnlockReleaseBuffer(currIdxBuf);
+ 
+ 	pfree(lt);
+ 	pfree(lteq);
+ 	pfree(gt);
+ 	pfree(gteq);
+ 
+ 	PG_RETURN_INT64(MaxHeapTuplesPerPage);
+ }
+ 
+ 
+ Datum
+ mmrescan(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
+ 
+ 	/* other arguments ignored */
+ 
+ 	if (scankey && scan->numberOfKeys > 0)
+ 	{
+ 		memmove(scan->keyData, scankey,
+ 				scan->numberOfKeys * sizeof(ScanKeyData));
+ 	}
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ Datum
+ mmendscan(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 
+ 	/* anything to do here? */
+ 	(void) scan;	/* silence compiler */
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ Datum
+ mmmarkpos(PG_FUNCTION_ARGS)
+ {
+ 	elog(ERROR, "MinMax does not support mark/restore");
+ 	PG_RETURN_VOID();
+ }
+ 
+ Datum
+ mmrestrpos(PG_FUNCTION_ARGS)
+ {
+ 	elog(ERROR, "MinMax does not support mark/restore");
+ 	PG_RETURN_VOID();
+ }
+ 
+ /*
+  * Reset the per-column build state in an MMBuildState.
+  */
+ static void
+ clear_mm_percol_buildstate(MMBuildState *mmstate)
+ {
+ 	int			i;
+ 
+ 	for (i = 0; i < mmstate->indexDesc->natts; i++)
+ 	{
+ 		mmstate->dtuple->values[i].allnulls = true;
+ 		mmstate->dtuple->values[i].hasnulls = false;
+ 		mmstate->dtuple->values[i].min = (Datum) 0;
+ 		mmstate->dtuple->values[i].max = (Datum) 0;
+ 	}
+ }
+ 
+ /*
+  * Per-heap-tuple callback for IndexBuildHeapScan.
+  *
+  * Note we don't worry about the page range at the end of the table here; it is
+  * present in the build state struct after we're called the last time, but not
+  * inserted into the index.  Caller must ensure to do so, if appropriate.
+  */
+ static void
+ mmbuildCallback(Relation index,
+ 				HeapTuple htup,
+ 				Datum *values,
+ 				bool *isnull,
+ 				bool tupleIsAlive,
+ 				void *state)
+ {
+ 	MMBuildState *mmstate = (MMBuildState *) state;
+ 	BlockNumber thisblock;
+ 	int			i;
+ 
+ 	thisblock = ItemPointerGetBlockNumber(&htup->t_self);
+ 
+ 	/*
+ 	 * If we're in a new block which belongs to the next range, summarize what
+ 	 * we've got and start afresh.
+ 	 */
+ 	if (thisblock == mmstate->nextRangeAt)
+ 	{
+ 		MMTuple    *tup;
+ 		Size		size;
+ 
+ 		MINMAX_elog(DEBUG2, "mmbuildCallback: completed a range: %u--%u",
+ 					mmstate->currRangeStart,
+ 					mmstate->nextRangeAt);
+ #if 0
+ 		for (i = 0; i < mmstate->indexDesc->natts; i++)
+ 		{
+ 			elog(DEBUG2, "completed a range for column %d, range: %u .. %u",
+ 				 i,
+ 				 DatumGetUInt32(mmstate->dtuple->values[i].min),
+ 				 DatumGetUInt32(mmstate->dtuple->values[i].max));
+ 		}
+ #endif
+ 
+ 		/*
+ 		 * Create the index tuple containing min/max values, and insert it.
+ 		 */
+ 		tup = minmax_form_tuple(mmstate->indexDesc, mmstate->diskDesc,
+ 								mmstate->dtuple, &size);
+ 		mm_doinsert(mmstate->irel, mmstate->rmAccess,
+ 					&mmstate->currentInsertBuf, mmstate->currRangeStart, tup,
+ 					size);
+ 		mmstate->numtuples++;
+ 		pfree(tup);
+ 
+ 		/* and set state to correspond to the new current range */
+ 		mmstate->currRangeStart = mmstate->nextRangeAt;
+ 		mmstate->nextRangeAt = mmstate->currRangeStart + MinmaxGetPagesPerRange(index);
+ 
+ 		/* initialize aggregate state for the new range */
+ 		for (i = 0; i < mmstate->indexDesc->natts; i++)
+ 		{
+ 			if (!mmstate->dtuple->values[i].allnulls &&
+ 				!mmstate->perColState[i].typByVal)
+ 			{
+ 				pfree(DatumGetPointer(mmstate->dtuple->values[i].min));
+ 				pfree(DatumGetPointer(mmstate->dtuple->values[i].max));
+ 			}
+ 		}
+ 
+ 		clear_mm_percol_buildstate(mmstate);
+ 	}
+ 
+ 	/* Accumulate the current tuple into the running state */
+ 	for (i = 0; i < mmstate->indexDesc->natts; i++)
+ 	{
+ 		/*
+ 		 * If the value in the current heap tuple is null, there's not much to
+ 		 * do other than keep track that we saw it.
+ 		 */
+ 		if (isnull[i])
+ 		{
+ 			mmstate->dtuple->values[i].hasnulls = true;
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * If this is the first tuple in the range containing a not-null value
+ 		 * for this column, initialize our state.
+ 		 */
+ 		if (mmstate->dtuple->values[i].allnulls)
+ 		{
+ 			mmstate->dtuple->values[i].allnulls = false;
+ 			mmstate->dtuple->values[i].min =
+ 				datumCopy(values[i],
+ 						  mmstate->perColState[i].typByVal,
+ 						  mmstate->perColState[i].typLen);
+ 			mmstate->dtuple->values[i].max =
+ 				datumCopy(values[i],
+ 						  mmstate->perColState[i].typByVal,
+ 						  mmstate->perColState[i].typLen);
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * Otherwise, dtuple state was already initialized, and the current
+ 		 * tuple is not null: therefore we need to compare it to the current
+ 		 * state and possibly update the min/max boundaries.
+ 		 */
+ 		if (invoke_mm_operator(&mmstate->perColState[i].lt, InvalidOid,
+ 							   values[i],
+ 							   mmstate->dtuple->values[i].min))
+ 		{
+ 			if (!mmstate->perColState[i].typByVal)
+ 				pfree(DatumGetPointer(mmstate->dtuple->values[i].min));
+ 			mmstate->dtuple->values[i].min =
+ 				datumCopy(values[i],
+ 						  mmstate->perColState[i].typByVal,
+ 						  mmstate->perColState[i].typLen);
+ 		}
+ 
+ 		if (invoke_mm_operator(&mmstate->perColState[i].gt, InvalidOid,
+ 							   values[i],
+ 							   mmstate->dtuple->values[i].max))
+ 		{
+ 			if (!mmstate->perColState[i].typByVal)
+ 				pfree(DatumGetPointer(mmstate->dtuple->values[i].min));
+ 			mmstate->dtuple->values[i].max =
+ 				datumCopy(values[i],
+ 						  mmstate->perColState[i].typByVal,
+ 						  mmstate->perColState[i].typLen);
+ 		}
+ 	}
+ }
+ 
+ /*
+  * Initialize a MMBuildState appropriate to create tuples on the given index.
+  */
+ static MMBuildState *
+ initialize_mm_buildstate(Relation heapRel, Relation idxRel,
+ 						 mmRevmapAccess *rmAccess, IndexInfo *indexInfo)
+ {
+ 	MMBuildState *mmstate;
+ 	TupleDesc	heapDesc = RelationGetDescr(heapRel);
+ 	Datum		indclassDatum;
+ 	bool		isnull;
+ 	oidvector  *indclass;
+ 	int			i;
+ 
+ 	mmstate = palloc(offsetof(MMBuildState, perColState) +
+ 					 sizeof(MMPerColBuildInfo) * indexInfo->ii_NumIndexAttrs);
+ 
+ 	mmstate->irel = idxRel;
+ 	mmstate->numtuples = 0;
+ 	mmstate->currentInsertBuf = InvalidBuffer;
+ 	mmstate->currRangeStart = 0;
+ 	mmstate->nextRangeAt = MinmaxGetPagesPerRange(idxRel);
+ 	mmstate->rmAccess = rmAccess;
+ 	mmstate->indexDesc = RelationGetDescr(idxRel);
+ 	mmstate->diskDesc = minmax_get_descr(mmstate->indexDesc);
+ 
+ 	mmstate->dtuple = palloc(offsetof(DeformedMMTuple, values) +
+ 							 sizeof(MMValues) * indexInfo->ii_NumIndexAttrs);
+ 	/* other stuff in dtuple is initialized below */
+ 
+ 	indclassDatum = SysCacheGetAttr(INDEXRELID, idxRel->rd_indextuple,
+ 									Anum_pg_index_indclass, &isnull);
+ 	Assert(!isnull);
+ 	indclass = (oidvector *) DatumGetPointer(indclassDatum);
+ 
+ 	for (i = 0; i < mmstate->indexDesc->natts; i++)
+ 	{
+ 		int			heapAttno;
+ 		Form_pg_attribute attr;
+ 		Oid			opfam = get_opclass_family(indclass->values[i]);
+ 		Oid			idxtypid = mmstate->indexDesc->attrs[i]->atttypid;
+ 
+ 		heapAttno = indexInfo->ii_KeyAttrNumbers[i];
+ 		if (heapAttno == 0)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 					 errmsg("cannot create minmax indexes on expressions")));
+ 
+ 		attr = heapDesc->attrs[heapAttno - 1];
+ 		mmstate->perColState[i].typByVal = attr->attbyval;
+ 		mmstate->perColState[i].typLen = attr->attlen;
+ 		get_mm_operator(opfam, idxtypid, idxtypid, BTLessStrategyNumber,
+ 						&(mmstate->perColState[i].lt));
+ 		get_mm_operator(opfam, idxtypid, idxtypid, BTGreaterStrategyNumber,
+ 						&(mmstate->perColState[i].gt));
+ 
+ 		/* initialize per-column state */
+ 	}
+ 
+ 	clear_mm_percol_buildstate(mmstate);
+ 
+ 	return mmstate;
+ }
+ 
+ /*
+  * Initialize a page with the given type.
+  *
+  * Caller is responsible for marking it dirty, as appropriate.
+  */
+ void
+ mm_page_init(Page page, uint16 type)
+ {
+ 	MinmaxSpecialSpace *special;
+ 
+ 	PageInit(page, BLCKSZ, sizeof(MinmaxSpecialSpace));
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	special->type = type;
+ }
+ 
+ /*
+  * Initialize a new minmax index' metapage.
+  */
+ void
+ mm_metapage_init(Page page)
+ {
+ 	MinmaxMetaPageData	*metadata;
+ 	int			i;
+ 
+ 	mm_page_init(page, MINMAX_PAGETYPE_META);
+ 
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(page);
+ 
+ 	metadata->minmaxVersion = MINMAX_CURRENT_VERSION;
+ 	for (i = 0; i < MAX_REVMAP_ARRAYPAGES; i++)
+ 		metadata->revmapArrayPages[i] = InvalidBlockNumber;
+ }
+ 
+ /*
+  * mmbuild() -- build a new minmax index.
+  */
+ Datum
+ mmbuild(PG_FUNCTION_ARGS)
+ {
+ 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
+ 	Relation	index = (Relation) PG_GETARG_POINTER(1);
+ 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
+ 	IndexBuildResult *result;
+ 	double		reltuples;
+ 	mmRevmapAccess *rmAccess;
+ 	MMBuildState *mmstate;
+ 	Buffer		meta;
+ 
+ 	/*
+ 	 * We expect to be called exactly once for any index relation.
+ 	 */
+ 	if (RelationGetNumberOfBlocks(index) != 0)
+ 		elog(ERROR, "index \"%s\" already contains data",
+ 			 RelationGetRelationName(index));
+ 
+ 	/* partial indexes not supported */
+ 	if (indexInfo->ii_Predicate != NIL)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("partial indexes not supported")));
+ 	/* expressions not supported (yet?) */
+ 	if (indexInfo->ii_Expressions != NIL)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("expression indexes not supported")));
+ 
+ 	meta = mm_getnewbuffer(index);
+ 	START_CRIT_SECTION();
+ 	mm_metapage_init(BufferGetPage(meta));
+ 	MarkBufferDirty(meta);
+ 
+ 	if (RelationNeedsWAL(index))
+ 	{
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata;
+ 		Page		page;
+ 
+ 		rdata.buffer = InvalidBuffer;
+ 		rdata.data = (char *) &(index->rd_node);
+ 		rdata.len = sizeof(RelFileNode);
+ 		rdata.next = NULL;
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_CREATE_INDEX, &rdata);
+ 
+ 		page = BufferGetPage(meta);
+ 		PageSetLSN(page, recptr);
+ 	}
+ 
+ 	UnlockReleaseBuffer(meta);
+ 	END_CRIT_SECTION();
+ 
+ 	/* set up our "reverse map" */
+ 	mmRevmapCreate(index);
+ 
+ 	/*
+ 	 * Initialize our state, including the deformed tuple state.
+ 	 */
+ 	rmAccess = mmRevmapAccessInit(index);
+ 	mmstate = initialize_mm_buildstate(heap, index, rmAccess, indexInfo);
+ 
+ 	/*
+ 	 * Now scan the relation.  No syncscan allowed here because we want the
+ 	 * heap blocks in order
+ 	 */
+ 	reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
+ 								   mmbuildCallback, (void *) mmstate);
+ 
+ 	/* XXX process the final batch, if needed */
+ 
+ 
+ 	/* release the last index buffer used */
+ 	if (!BufferIsInvalid(mmstate->currentInsertBuf))
+ 	{
+ 		ReleaseBuffer(mmstate->currentInsertBuf);
+ 		mmstate->currentInsertBuf = InvalidBuffer;
+ 	}
+ 
+ 	mmRevmapAccessTerminate(mmstate->rmAccess);
+ 
+ 	/*
+ 	 * Return statistics
+ 	 */
+ 	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+ 
+ 	result->heap_tuples = reltuples;
+ 	result->index_tuples = mmstate->numtuples;
+ 
+ 	PG_RETURN_POINTER(result);
+ }
+ 
+ Datum
+ mmbuildempty(PG_FUNCTION_ARGS)
+ {
+ 	ereport(ERROR,
+ 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 			 errmsg("unlogged MinMax indexes are not supported")));
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ 
+ /*
+  * qsort comparator for ItemPointerData items
+  */
+ static int
+ qsortCompareItemPointers(const void *a, const void *b)
+ {
+ 	return ItemPointerCompare((ItemPointer) a, (ItemPointer) b);
+ }
+ 
+ /*
+  * Remove index tuples that are no longer useful.
+  *
+  * While at it, return in nonsummed the array (and in numnonsummed its size) of
+  * block numbers for which the revmap returns InvalidTid; this is used in a
+  * later stage to execute re-summarization.  (Each block number returned
+  * corresponds to the heap page number with which each unsummarized range
+  * starts.)	Space for the array is palloc'ed, and must be freed by caller.
+  *
+  * idxRel is the index relation; heapNumBlocks is the size of the heap
+  * relation; strategy is appropriate for bulk scanning.
+  */
+ static void
+ remove_deletable_tuples(Relation idxRel, BlockNumber heapNumBlocks,
+ 						BufferAccessStrategy strategy,
+ 						BlockNumber **nonsummed, int *numnonsummed)
+ {
+ 	HASHCTL		hctl;
+ 	HTAB	   *tuples;
+ 	HASH_SEQ_STATUS status;
+ 	BlockNumber nblocks;
+ 	BlockNumber blk;
+ 	mmRevmapAccess *rmAccess;
+ 	BlockNumber heapBlk;
+ 	BlockNumber	pagesPerRange;
+ 	int			numitems = 0;
+ 	int			numdeletable = 0;
+ 	ItemPointerData *deletable;
+ 	int			start;
+ 	int			i;
+ 	BlockNumber *nonsumm = NULL;
+ 	int			maxnonsumm = 0;
+ 	int			numnonsumm = 0;
+ 
+ 	typedef struct DeletableTuple
+ 	{
+ 		ItemPointerData tid;
+ 		bool		referenced;
+ 	} DeletableTuple;
+ 
+ 	nblocks = RelationGetNumberOfBlocks(idxRel);
+ 
+ 	/* Initialize hash used to track deletable tuples */
+ 	memset(&hctl, 0, sizeof(hctl));
+ 	hctl.keysize = sizeof(ItemPointerData);
+ 	hctl.entrysize = sizeof(DeletableTuple);
+ 	hctl.hcxt = CurrentMemoryContext;
+ 	hctl.hash = tag_hash;
+ 
+ 	/* assume ten entries per page.  No harm in getting this wrong */
+ 	tuples = hash_create("mmvacuumcleanup", nblocks * 10, &hctl,
+ 						 HASH_CONTEXT | HASH_FUNCTION | HASH_ELEM);
+ 
+ 	/*
+ 	 * Scan the index sequentially, entering each item into a hash table.
+ 	 * Initially, the items are marked as not referenced.
+ 	 */
+ 	for (blk = 0; blk < nblocks; blk++)
+ 	{
+ 		Buffer		buf;
+ 		Page		page;
+ 		OffsetNumber offno;
+ 
+ 		vacuum_delay_point();
+ 
+ 		buf = ReadBufferExtended(idxRel, MAIN_FORKNUM, blk, RBM_NORMAL,
+ 								 strategy);
+ 		LockBuffer(buf, BUFFER_LOCK_SHARE);
+ 		page = BufferGetPage(buf);
+ 
+ 		for (offno = 1; offno <= PageGetMaxOffsetNumber(page); offno++)
+ 		{
+ 			ItemPointerData tid;
+ 			ItemId		itemid;
+ 			bool		found;
+ 			DeletableTuple *hitem;
+ 
+ 			itemid = PageGetItemId(page, offno);
+ 			if (!ItemIdHasStorage(itemid))
+ 				continue;
+ 
+ 			ItemPointerSet(&tid, blk, offno);
+ 			hitem = (DeletableTuple *)
+ 				hash_search(tuples, &tid, HASH_ENTER, &found);
+ 			Assert(!found);
+ 			hitem->referenced = false;
+ 			numitems++;
+ 		}
+ 		UnlockReleaseBuffer(buf);
+ 	}
+ 
+ 	/*
+ 	 * now scan the revmap, and determine which of these TIDs are still
+ 	 * referenced
+ 	 */
+ 	pagesPerRange = MinmaxGetPagesPerRange(idxRel);
+ 	rmAccess = mmRevmapAccessInit(idxRel);
+ 	for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange)
+ 	{
+ 		ItemPointerData itupptr;
+ 		DeletableTuple *hitem;
+ 		bool		found;
+ 
+ 		mmGetHeapBlockItemptr(rmAccess, heapBlk, &itupptr);
+ 
+ 		if (!ItemPointerIsValid(&itupptr))
+ 		{
+ 			/*
+ 			 * Ignore revmap entries set to invalid.  Before doing so, if the
+ 			 * heap page range is complete but not summarized, store its
+ 			 * initial page number in the unsummarized array, for later
+ 			 * summarization.
+ 			 */
+ 			if (heapBlk + pagesPerRange < heapNumBlocks)
+ 			{
+ 				if (maxnonsumm == 0)
+ 				{
+ 					Assert(!nonsumm);
+ 					maxnonsumm = 8;
+ 					nonsumm = palloc(sizeof(BlockNumber) * maxnonsumm);
+ 				}
+ 				else if (numnonsumm >= maxnonsumm)
+ 				{
+ 					maxnonsumm *= 2;
+ 					nonsumm = repalloc(nonsumm, sizeof(BlockNumber) * maxnonsumm);
+ 				}
+ 
+ 				nonsumm[numnonsumm++] = heapBlk;
+ 			}
+ 
+ 			continue;
+ 		}
+ 		else
+ 			UnlockTuple(idxRel, &itupptr, ShareLock);
+ 
+ 		hitem = (DeletableTuple *) hash_search(tuples,
+ 											   &itupptr,
+ 											   HASH_FIND,
+ 											   &found);
+ 		if (!found)
+ 			elog(ERROR, "reverse map references nonexistant index tuple %u/%u",
+ 				 ItemPointerGetBlockNumber(&itupptr),
+ 				 ItemPointerGetOffsetNumber(&itupptr));
+ 		hitem->referenced = true;
+ 
+ 		/* discount items set as referenced */
+ 		numitems--;
+ 	}
+ 	Assert(numitems >= 0);
+ 
+ 	mmRevmapAccessTerminate(rmAccess);
+ 
+ 	/*
+ 	 * Now scan the hash, and keep track of the removable (i.e. not referenced,
+ 	 * not locked) tuples.
+ 	 */
+ 	deletable = palloc(sizeof(ItemPointerData) * numitems);
+ 
+ 	hash_freeze(tuples);
+ 	hash_seq_init(&status, tuples);
+ 	for (;;)
+ 	{
+ 		DeletableTuple *hitem;
+ 
+ 		hitem = hash_seq_search(&status);
+ 		if (!hitem)
+ 			break;
+ 		if (hitem->referenced)
+ 			continue;
+ 		if (!ConditionalLockTuple(idxRel, &hitem->tid, ExclusiveLock))
+ 			continue;
+ 
+ 		/*
+ 		 * By here, we know this tuple is not referenced from the revmap.
+ 		 * Also, since we hold the tuple lock, we know that if there is a
+ 		 * concurrent scan that had obtained the tuple before the reference
+ 		 * got removed, either that scan is not looking at the tuple (because
+ 		 * that would have prevented us from getting the tuple lock) or it is
+ 		 * holding the containing buffer's lock.  If the former, then there's
+ 		 * no problem with removing the tuple immediately; if the latter, we
+ 		 * will block below trying to acquire that lock, so by the time we are
+ 		 * unblocked, the concurrent scan will no longer be interested in the
+ 		 * tuple contents anymore.	Therefore, this tuple can be removed from
+ 		 * the block.
+ 		 */
+ 		UnlockTuple(idxRel, &hitem->tid, ExclusiveLock);
+ 
+ 		deletable[numdeletable++] = hitem->tid;
+ 	}
+ 
+ 	/*
+ 	 * Now sort the array of deletable index tuples, and walk this array by
+ 	 * pages doing bulk deletion of items on each page; the free space map is
+ 	 * updated for pages on which we delete item.
+ 	 */
+ 	qsort(deletable, numdeletable, sizeof(ItemPointerData),
+ 		  qsortCompareItemPointers);
+ 
+ 	start = 0;
+ 	for (i = 0; i < numdeletable; i++)
+ 	{
+ 		if (i == numdeletable - 1 ||
+ 			(ItemPointerGetBlockNumber(&deletable[start]) !=
+ 			 ItemPointerGetBlockNumber(&deletable[i + 1])))
+ 		{
+ 			OffsetNumber *offnos;
+ 			int			noffs;
+ 			Buffer		buf;
+ 			Page		page;
+ 			int			j;
+ 			BlockNumber	blk;
+ 			int			freespace;
+ 
+ 			vacuum_delay_point();
+ 
+ 			blk = ItemPointerGetBlockNumber(&deletable[start]);
+ 			buf = ReadBufferExtended(idxRel, MAIN_FORKNUM, blk,
+ 									 RBM_NORMAL, strategy);
+ 			LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 			page = BufferGetPage(buf);
+ 
+ 			noffs = i + 1 - start;
+ 			offnos = palloc(sizeof(OffsetNumber) * noffs);
+ 
+ 			for (j = 0; j < noffs; j++)
+ 				offnos[j] = ItemPointerGetOffsetNumber(&deletable[start + j]);
+ 
+ 			/*
+ 			 * Now defragment the page.
+ 			 */
+ 			START_CRIT_SECTION();
+ 
+ 			PageIndexDeleteNoCompact(page, offnos, noffs);
+ 			MarkBufferDirty(buf);
+ 
+ 			/* XLOG stuff */
+ 			if (RelationNeedsWAL(idxRel))
+ 			{
+ 				xl_minmax_bulkremove	xlrec;
+ 				XLogRecPtr	recptr;
+ 				XLogRecData	rdata[2];
+ 
+ 				xlrec.node = idxRel->rd_node;
+ 				xlrec.block = blk;
+ 				rdata[0].data = (char *) &xlrec;
+ 				rdata[0].len = SizeOfMinmaxBulkRemove;
+ 				rdata[0].buffer = InvalidBuffer;
+ 				rdata[0].buffer_std = false;
+ 				rdata[0].next = &(rdata[1]);
+ 
+ 				/*
+ 				 * The OffsetNumber array is not actually in the buffer, but we
+ 				 * pretend that it is.  When XLogInsert stores the whole
+ 				 * buffer, the offset array need not be stored too.
+ 				 */
+ 				rdata[1].data = (char *) offnos;
+ 				rdata[1].len = sizeof(OffsetNumber) * noffs;
+ 				rdata[1].buffer = buf;
+ 				rdata[1].buffer_std = true;
+ 				rdata[1].next = NULL;
+ 
+ 				recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_BULKREMOVE,
+ 									rdata);
+ 
+ 				PageSetLSN(page, recptr);
+ 			}
+ 
+ 			END_CRIT_SECTION();
+ 
+ 			/* next iteration starts where this one ended */
+ 			start = i + 1;
+ 
+ 			/* remember free space while we have the buffer locked */
+ 			freespace = PageGetFreeSpace(page);
+ 
+ 			UnlockReleaseBuffer(buf);
+ 			pfree(offnos);
+ 
+ 			RecordPageWithFreeSpace(idxRel, blk, freespace);
+ 		}
+ 	}
+ 
+ 	pfree(deletable);
+ 
+ 	/* Finally, ensure the index' FSM is consistent */
+ 	FreeSpaceMapVacuum(idxRel);
+ 
+ 	*nonsummed = nonsumm;
+ 	*numnonsummed = numnonsumm;
+ 
+ 	hash_destroy(tuples);
+ }
+ 
+ /*
+  * Summarize the given page ranges of the given index.
+  */
+ static void
+ rerun_summarization(Relation idxRel, Relation heapRel, mmRevmapAccess *rmAccess,
+ 					BlockNumber *nonsummarized, int numnonsummarized)
+ {
+ 	int			i;
+ 	IndexInfo  *indexInfo;
+ 	MMBuildState *mmstate;
+ 	BlockNumber	pagesPerRange;
+ 
+ 	indexInfo = BuildIndexInfo(idxRel);
+ 	pagesPerRange = MinmaxGetPagesPerRange(idxRel);
+ 
+ 	mmstate = initialize_mm_buildstate(heapRel, idxRel, rmAccess, indexInfo);
+ 
+ 	for (i = 0; i < numnonsummarized; i++)
+ 	{
+ 		BlockNumber blk = nonsummarized[i];
+ 		ItemPointerData iptr;
+ 		MMTuple    *tup;
+ 		Size		size;
+ 
+ 		mmstate->currRangeStart = blk;
+ 		mmstate->nextRangeAt = blk + pagesPerRange;
+ 
+ 		mmGetHeapBlockItemptr(rmAccess, blk, &iptr);
+ 		/* it can't have been re-summarized concurrently .. */
+ 		Assert(!ItemPointerIsValid(&iptr));
+ 
+ 		IndexBuildHeapRangeScan(heapRel, idxRel, indexInfo, false,
+ 								blk, pagesPerRange,
+ 								mmbuildCallback, (void *) mmstate);
+ 
+ 		/*
+ 		 * Create the index tuple containing min/max values, and insert it.
+ 		 * Note mmbuildCallback didn't have the chance to actually insert
+ 		 * anything into the index, because the heapscan should have ended
+ 		 * just as it reached the final tuple in the range.
+ 		 */
+ 		tup = minmax_form_tuple(mmstate->indexDesc, mmstate->diskDesc,
+ 								mmstate->dtuple, &size);
+ 		mm_doinsert(mmstate->irel, mmstate->rmAccess,
+ 					&mmstate->currentInsertBuf, mmstate->currRangeStart, tup,
+ 					size);
+ 		mmstate->numtuples++;
+ 		pfree(tup);
+ 
+ 		clear_mm_percol_buildstate(mmstate);
+ 	}
+ 
+ 	if (!BufferIsInvalid(mmstate->currentInsertBuf))
+ 	{
+ 		ReleaseBuffer(mmstate->currentInsertBuf);
+ 		mmstate->currentInsertBuf = InvalidBuffer;
+ 	}
+ }
+ 
+ /*
+  * ambulkdelete
+  *		Since there are no per-heap-tuple index tuples here, there's not a lot
+  *		we can do here.
+  *
+  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
+  * tuple is deleted), meaning the need to re-run summarization on the affected
+  * range.  We'd need to expand on-disk mmtuples with an extra flag for that,
+  * though.
+  */
+ Datum
+ mmbulkdelete(PG_FUNCTION_ARGS)
+ {
+ 	/* other arguments are not currently used */
+ 	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ 
+ 	/* allocate stats if first time through, else re-use existing struct */
+ 	if (stats == NULL)
+ 		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ 
+ 	PG_RETURN_POINTER(stats);
+ }
+ 
+ /*
+  * This routine is in charge of "vacuuming" a minmax index: 1) remove index
+  * tuples that are no longer referenced from the revmap.  2) summarize ranges
+  * that are currently unsummarized.
+  */
+ Datum
+ mmvacuumcleanup(PG_FUNCTION_ARGS)
+ {
+ 	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+ 	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ 	mmRevmapAccess *rmAccess;
+ 	BlockNumber *nonsummarized = NULL;
+ 	int			numnonsummarized;
+ 	Relation	heapRel;
+ 	BlockNumber	heapNumBlocks;
+ 
+ 	/* No-op in ANALYZE ONLY mode */
+ 	if (info->analyze_only)
+ 		PG_RETURN_POINTER(stats);
+ 
+ 	rmAccess = mmRevmapAccessInit(info->index);
+ 
+ 	heapRel = heap_open(IndexGetRelation(RelationGetRelid(info->index), false),
+ 						AccessShareLock);
+ 
+ 	/*
+ 	 * First scan the index, removing index tuples that are no longer
+ 	 * referenced from the revmap.  While at it, collect the page numbers of
+ 	 * ranges that are not summarized.
+ 	 */
+ 	heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
+ 	remove_deletable_tuples(info->index, heapNumBlocks, info->strategy,
+ 							&nonsummarized, &numnonsummarized);
+ 
+ 	/* and summarize the ranges collected above */
+ 	if (nonsummarized)
+ 	{
+ 		rerun_summarization(info->index, heapRel, rmAccess,
+ 							nonsummarized, numnonsummarized);
+ 		pfree(nonsummarized);
+ 	}
+ 
+ 	mmRevmapAccessTerminate(rmAccess);
+ 	heap_close(heapRel, AccessShareLock);
+ 
+ 	PG_RETURN_POINTER(stats);
+ }
+ 
+ Datum
+ mmoptions(PG_FUNCTION_ARGS)
+ {
+ 	Datum		reloptions = PG_GETARG_DATUM(0);
+ 	bool		validate = PG_GETARG_BOOL(1);
+ 	relopt_value *options;
+ 	MinmaxOptions *rdopts;
+ 	int			numoptions;
+ 	static const relopt_parse_elt tab[] = {
+ 		{"pages_per_range", RELOPT_TYPE_INT, offsetof(MinmaxOptions, pagesPerRange)}
+ 	};
+ 
+ 	options = parseRelOptions(reloptions, validate, RELOPT_KIND_MINMAX,
+ 							  &numoptions);
+ 
+ 	/* if none set, we're done */
+ 	if (numoptions == 0)
+ 		PG_RETURN_NULL();
+ 
+ 	rdopts = allocateReloptStruct(sizeof(MinmaxOptions), options, numoptions);
+ 
+ 	fillRelOptions((void *) rdopts, sizeof(MinmaxOptions), options, numoptions,
+ 				   validate, tab, lengthof(tab));
+ 
+ 	pfree(options);
+ 
+ 	PG_RETURN_BYTEA_P(rdopts);
+ }
+ 
+ /*
+  * Fill the given finfo to enable calls to the operator specified by the given
+  * parameters.
+  */
+ static void
+ get_mm_operator(Oid opfam, Oid idxtypid, Oid keytypid,
+ 				StrategyNumber strategy, FmgrInfo *finfo)
+ {
+ 	Oid			oprid;
+ 	HeapTuple	oper;
+ 
+ 	oprid = get_opfamily_member(opfam, idxtypid, keytypid, strategy);
+ 	if (!OidIsValid(oprid))
+ 		elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+ 			 strategy, idxtypid, keytypid, opfam);
+ 
+ 	oper = SearchSysCache1(OPEROID, oprid);
+ 	if (!HeapTupleIsValid(oper))
+ 		elog(ERROR, "cache lookup failed for operator %u", oprid);
+ 
+ 	fmgr_info(((Form_pg_operator) GETSTRUCT(oper))->oprcode, finfo);
+ 	ReleaseSysCache(oper);
+ }
+ 
+ /*
+  * Invoke the given operator, and return the result as a C boolean.
+  */
+ static inline bool
+ invoke_mm_operator(FmgrInfo *operator, Oid collation, Datum left, Datum right)
+ {
+ 	Datum		result;
+ 
+ 	result = FunctionCall2Coll(operator, collation, left, right);
+ 
+ 	return DatumGetBool(result);
+ }
+ 
+ /*
+  * Insert an index tuple into the index relation.  The revmap is updated to
+  * mark the range containing the given page as pointing to the inserted entry.
+  *
+  * The buffer, if valid, is checked for free space to insert the new entry;
+  * if there isn't enough, a new buffer is obtained and pinned.
+  *
+  * The buffer is marked dirty.
+  */
+ static void
+ mm_doinsert(Relation idxrel, mmRevmapAccess *rmAccess, Buffer *buffer,
+ 			BlockNumber heapblkno, MMTuple *tup, Size itemsz)
+ {
+ 	Page		page;
+ 	BlockNumber blk;
+ 	OffsetNumber off;
+ 	bool		extended;
+ 
+ 	itemsz = MAXALIGN(itemsz);
+ 
+ 	extended = mm_getinsertbuffer(idxrel, buffer, itemsz);
+ 	page = BufferGetPage(*buffer);
+ 
+ 	if (PageGetFreeSpace(page) < itemsz)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ 				 errmsg("index row size %lu exceeds maximum for index \"%s\"",
+ 						itemsz, RelationGetRelationName(idxrel))));
+ 
+ 	START_CRIT_SECTION();
+ 	off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
+ 					  false, false);
+ 	MarkBufferDirty(*buffer);
+ 
+ 	blk = BufferGetBlockNumber(*buffer);
+ 	MINMAX_elog(DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
+ 				blk, off, heapblkno);
+ 
+ 	/* XLOG stuff */
+ 	if (RelationNeedsWAL(idxrel))
+ 	{
+ 		xl_minmax_insert	xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata[2];
+ 		uint8		info = XLOG_MINMAX_INSERT;
+ 
+ 		xlrec.target.node = idxrel->rd_node;
+ 		ItemPointerSet(&xlrec.target.tid, blk, off);
+ 		xlrec.overwrite = false;
+ 		rdata[0].data = (char *) &xlrec;
+ 		rdata[0].len = SizeOfMinmaxInsert;
+ 		rdata[0].buffer = InvalidBuffer;
+ 		rdata[0].buffer_std = false;
+ 		rdata[0].next = &(rdata[1]);
+ 
+ 		rdata[1].data = (char *) tup;
+ 		rdata[1].len = itemsz;
+ 		rdata[1].buffer = *buffer;
+ 		rdata[1].buffer_std = true;
+ 		rdata[1].next = NULL;
+ 
+ 		/*
+ 		 * If this is the first tuple in the page, we can reinit the page
+ 		 * instead of restoring the whole thing.  Set flag, and hide buffer
+ 		 * references from XLogInsert.
+ 		 */
+ 		if (extended)
+ 		{
+ 			info |= XLOG_MINMAX_INIT_PAGE;
+ 			rdata[1].buffer = InvalidBuffer;
+ 		}
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 		PageSetLSN(page, recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	/*
+ 	 * Note we need to keep the lock on the buffer until after the revmap
+ 	 * has been updated.  Otherwise, a concurrent scanner could try to obtain
+ 	 * the index tuple from the revmap before we're done writing it.
+ 	 */
+ 	mmSetHeapBlockItemptr(rmAccess, heapblkno, blk, off);
+ 
+ 	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ }
+ 
+ /*
+  * Return an exclusively-locked buffer resulting from extending the relation.
+  */
+ Buffer
+ mm_getnewbuffer(Relation irel)
+ {
+ 	Buffer	buffer;
+ 	bool	needLock = !RELATION_IS_LOCAL(irel);
+ 
+ 	/* FIXME need to request a MaxFSMRequestSize page from the FSM here */
+ 
+ 	if (needLock)
+ 		LockRelationForExtension(irel, ExclusiveLock);
+ 
+ 	buffer = ReadBuffer(irel, P_NEW);
+ 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 	MINMAX_elog(DEBUG2, "mm_getnewbuffer: extending to page %u",
+ 				BufferGetBlockNumber(buffer));
+ 
+ 	if (needLock)
+ 		UnlockRelationForExtension(irel, ExclusiveLock);
+ 
+ 	return buffer;
+ }
+ 
+ /*
+  * Return a pinned and locked buffer which can be used to insert an index item
+  * of size itemsz.
+  *
+  * The passed buffer argument is tested for free space; if it has enough, it is
+  * locked and returned.  Otherwise, that buffer (if valid) is unpinned, a new
+  * buffer is obtained, and returned pinned and locked.
+  *
+  * If there's no existing page with enough free to accomodate the new item,
+  * the relation is extended.  This function returns true if this happens, false
+  * otherwise.
+  */
+ static bool
+ mm_getinsertbuffer(Relation irel, Buffer *buffer, Size itemsz)
+ {
+ 	Buffer		buf;
+ 	bool		extended = false;
+ 
+ 	buf = *buffer;
+ 
+ 	if (BufferIsInvalid(buf) ||
+ 		(PageGetFreeSpace(BufferGetPage(buf)) < itemsz))
+ 	{
+ 		Page		page;
+ 
+ 		/*
+ 		 * By the time we break out of this loop, buf is a locked and pinned
+ 		 * buffer which has enough free space to satisfy the requirement.
+ 		 */
+ 		for (;;)
+ 		{
+ 			BlockNumber	blk;
+ 			int			freespace;
+ 
+ 			blk = GetPageWithFreeSpace(irel, itemsz);
+ 			if (blk == InvalidBlockNumber)
+ 			{
+ 				/*
+ 				 * There's not enough free space in any existing index page,
+ 				 * according to the FSM: extend the relation to obtain a shiny
+ 				 * new page.
+ 				 */
+ 				buf = mm_getnewbuffer(irel);
+ 				page = BufferGetPage(buf);
+ 				mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
+ 
+ 				/*
+ 				 * If an entirely new page does not contain enough free space
+ 				 * for the new item, then surely that item is oversized.
+ 				 * Complain loudly; but first make sure we record the page as
+ 				 * free, for next time.
+ 				 */
+ 				freespace = PageGetFreeSpace(page);
+ 				RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
+ 										freespace);
+ 				if (freespace < itemsz)
+ 					ereport(ERROR,
+ 							(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ 							 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ 									(unsigned long) itemsz,
+ 									(unsigned long) freespace,
+ 									RelationGetRelationName(irel))));
+ 				extended = true;
+ 				break;
+ 			}
+ 
+ 			Assert(blk != InvalidBlockNumber);
+ 			buf = ReadBuffer(irel, blk);
+ 			LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 			page = BufferGetPage(buf);
+ 			freespace = PageGetFreeSpace(page);
+ 			if (freespace >= itemsz)
+ 				break;
+ 
+ 			/* Not really enough space: register reality and start over */
+ 			UnlockReleaseBuffer(buf);
+ 			RecordPageWithFreeSpace(irel, blk, freespace);
+ 		}
+ 
+ 		if (!BufferIsInvalid(*buffer))
+ 			ReleaseBuffer(*buffer);
+ 		*buffer = buf;
+ 	}
+ 	else
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 	/*
+ 	 * XXX we could perhaps avoid this if we used RelationSetTargetBlock ...
+ 	 */
+ 	if (extended)
+ 		FreeSpaceMapVacuum(irel);
+ 
+ 	return extended;
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmrevmap.c
***************
*** 0 ****
--- 1,679 ----
+ /*
+  * mmrevmap.c
+  *		Reverse range map for MinMax indexes
+  *
+  * The reverse range map (revmap) is a translation structure for minmax
+  * indexes: for each page range, there is one most-up-to-date summary tuple,
+  * and its location is tracked by the revmap.  Whenever a new tuple is inserted
+  * into a table that violates the previously recorded min/max values, a new
+  * tuple is inserted into the index and the revmap is updated to point to it.
+  *
+  * The pages of the revmap are interspersed in the index's main fork.  The
+  * first revmap page is always the index's page number one (that is,
+  * immediately after the metapage).  Subsequent revmap pages are allocated as
+  * they are needed; their locations are tracked by "array pages".  The metapage
+  * contains a large BlockNumber array, which correspond to array pages.  Thus,
+  * to find the second revmap page, we read the metapage and obtain the block
+  * number of the first array page; we then read that page, and the first
+  * element in it is the revmap page we're looking for.
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmrevmap.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/heapam_xlog.h"
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_xlog.h"
+ #include "access/rmgr.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/lmgr.h"
+ #include "storage/relfilenode.h"
+ #include "storage/smgr.h"
+ #include "utils/memutils.h"
+ 
+ 
+ 
+ /*
+  * In regular revmap pages, each item stores an ItemPointerData.  These defines
+  * let one find the logical revmap page number and index number of the revmap
+  * item for the given heap block number.
+  */
+ #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
+ 	((heapBlk / pagesPerRange) / REGULAR_REVMAP_PAGE_MAXITEMS)
+ #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
+ 	((heapBlk / pagesPerRange) % REGULAR_REVMAP_PAGE_MAXITEMS)
+ 
+ /*
+  * In array revmap pages, each item stores a BlockNumber.  These defines let
+  * one find the page and index number of a given revmap block number.  Note
+  * that the first revmap page (revmap logical page number 0) is always stored
+  * in physical block number 1, so array pages do not store that one.
+  */
+ #define MAPBLK_TO_RMARRAY_BLK(rmBlk)	((rmBlk - 1) / ARRAY_REVMAP_PAGE_MAXITEMS)
+ #define MAPBLK_TO_RMARRAY_INDEX(rmBlk)	((rmBlk - 1) % ARRAY_REVMAP_PAGE_MAXITEMS)
+ 
+ 
+ struct mmRevmapAccess
+ {
+ 	Relation	idxrel;
+ 	BlockNumber pagesPerRange;
+ 	Buffer		currBuf;
+ 	Buffer		currArrayBuf;
+ 	BlockNumber *revmapArrayPages;
+ };
+ /* typedef appears in minmax_revmap.h */
+ 
+ 
+ /*
+  * Initialize an access object for a reverse range map, which can be used to
+  * read stuff from it.	This must be freed by mmRevmapAccessTerminate when caller
+  * is done with it.
+  */
+ mmRevmapAccess *
+ mmRevmapAccessInit(Relation idxrel)
+ {
+ 	mmRevmapAccess *rmAccess = palloc(sizeof(mmRevmapAccess));
+ 
+ 	rmAccess->idxrel = idxrel;
+ 	rmAccess->pagesPerRange = MinmaxGetPagesPerRange(idxrel);
+ 	rmAccess->currBuf = InvalidBuffer;
+ 	rmAccess->currArrayBuf = InvalidBuffer;
+ 	rmAccess->revmapArrayPages = NULL;
+ 
+ 	return rmAccess;
+ }
+ 
+ /*
+  * Release resources associated with a revmap access object.
+  */
+ void
+ mmRevmapAccessTerminate(mmRevmapAccess *rmAccess)
+ {
+ 	if (rmAccess->revmapArrayPages != NULL)
+ 		pfree(rmAccess->revmapArrayPages);
+ 	if (rmAccess->currBuf != InvalidBuffer)
+ 		ReleaseBuffer(rmAccess->currBuf);
+ 	if (rmAccess->currArrayBuf != InvalidBuffer)
+ 		ReleaseBuffer(rmAccess->currArrayBuf);
+ 	pfree(rmAccess);
+ }
+ 
+ /*
+  * In the given revmap page, which is used in a minmax index of pagesPerRange
+  * pages per range, set the element corresponding to heap block number heapBlk
+  * to the value (blkno, offno).
+  *
+  * Caller must have obtained the correct revmap page.
+  *
+  * This is used both in regular operation and during WAL replay.
+  */
+ void
+ rm_page_set_iptr(Page page, int pagesPerRange, BlockNumber heapBlk,
+ 				 BlockNumber blkno, OffsetNumber offno)
+ {
+ 	RevmapContents *contents;
+ 	ItemPointerData *iptr;
+ 
+ 	contents = (RevmapContents *) PageGetContents(page);
+ 	iptr = (ItemPointerData *) contents->rmr_tids;
+ 	iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
+ 
+ 	ItemPointerSet(iptr, blkno, offno);
+ }
+ 
+ /*
+  * Initialize a new regular revmap page, which stores the given revmap logical
+  * page number.  The newly allocated physical block number is returned.
+  *
+  * Used both by regular code path as well as during xlog replay.
+  */
+ BlockNumber
+ initialize_rmr_page(Buffer newbuf, BlockNumber mapBlk)
+ {
+ 	BlockNumber	blkno;
+ 	Page		page;
+ 	RevmapContents *contents;
+ 
+ 	page = BufferGetPage(newbuf);
+ 
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 	contents = (RevmapContents *) PageGetContents(page);
+ 	contents->rmr_logblk = mapBlk;
+ 	/* the rmr_tids array is initialized to all invalid by PageInit */
+ 
+ 	blkno = BufferGetBlockNumber(newbuf);
+ 
+ 	return blkno;
+ }
+ 
+ /*
+  * Read the metapage, lock it as specified by called, and update the given
+  * rmAccess with the metapage data.  Return value is the locked buffer, which
+  * must be unlocked and released by caller.
+  */
+ static Buffer
+ rmaccess_get_metapage(mmRevmapAccess *rmAccess, int lockmode)
+ {
+ 	Buffer		meta;
+ 	MinmaxMetaPageData *metadata;
+ 	MinmaxSpecialSpace *special PG_USED_FOR_ASSERTS_ONLY;
+ 	Page		metapage;
+ 
+ 	meta = ReadBuffer(rmAccess->idxrel, MINMAX_METAPAGE_BLKNO);
+ 	LockBuffer(meta, lockmode);
+ 
+ 	metapage = BufferGetPage(meta);
+ 
+ #ifdef USE_ASSERT_CHECKING
+ 	/* ensure we really got the metapage */
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(metapage);
+ 	Assert(special->type == MINMAX_PAGETYPE_META);
+ #endif
+ 
+ 	/* first time through? allocate the array */
+ 	if (rmAccess->revmapArrayPages == NULL)
+ 		rmAccess->revmapArrayPages =
+ 			palloc(sizeof(BlockNumber) * MAX_REVMAP_ARRAYPAGES);
+ 
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(metapage);
+ 	memcpy(rmAccess->revmapArrayPages, metadata->revmapArrayPages,
+ 		   sizeof(BlockNumber) * MAX_REVMAP_ARRAYPAGES);
+ 
+ 	return meta;
+ }
+ 
+ /*
+  * Given a buffer (hopefully containing a blank page), set it up as a revmap
+  * array page.
+  *
+  * Used both by regular code path as well as during xlog replay.
+  */
+ void
+ initialize_rma_page(Buffer buf)
+ {
+ 	Page	arrayPg;
+ 	RevmapArrayContents *contents;
+ 
+ 	arrayPg = BufferGetPage(buf);
+ 	mm_page_init(arrayPg, MINMAX_PAGETYPE_REVMAP_ARRAY);
+ 	contents = (RevmapArrayContents *) PageGetContents(arrayPg);
+ 	contents->rma_nblocks = 0;
+ 	/* set the whole array to InvalidBlockNumber */
+ 	memset(contents->rma_blocks, 0xFF,
+ 		   sizeof(BlockNumber) * ARRAY_REVMAP_PAGE_MAXITEMS);
+ }
+ 
+ /*
+  * Update the metapage, so that item arrayBlkIdx in the array of revmap array
+  * pages points to block number newPgBlkno
+  */
+ static void
+ update_minmax_metapg(Relation idxrel, Buffer meta, uint32 arrayBlkIdx,
+ 					 BlockNumber newPgBlkno)
+ {
+ 	MinmaxMetaPageData *metadata;
+ 
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(BufferGetPage(meta));
+ 
+ 	START_CRIT_SECTION();
+ 	metadata->revmapArrayPages[arrayBlkIdx] = newPgBlkno;
+ 	MarkBufferDirty(meta);
+ 	if (RelationNeedsWAL(idxrel))
+ 	{
+ 		xl_minmax_metapg_set	xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata;
+ 
+ 		xlrec.node = idxrel->rd_node;
+ 		xlrec.blkidx = arrayBlkIdx;
+ 		xlrec.newpg = newPgBlkno;
+ 
+ 		rdata.data = (char *) &xlrec;
+ 		rdata.len = SizeOfMinmaxMetapgSet;
+ 		rdata.buffer = InvalidBuffer;
+ 		rdata.buffer_std = false;
+ 		rdata.next = NULL;
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_METAPG_SET, &rdata);
+ 		PageSetLSN(BufferGetPage(meta), recptr);
+ 	}
+ 	END_CRIT_SECTION();
+ }
+ 
+ /*
+  * Given a logical revmap block number, find its physical block number.
+  *
+  * Note this might involve up to two buffer reads, including a possible
+  * update to the metapage.
+  *
+  * If extend is set to true, and the page hasn't been set yet, extend the
+  * array to point to a newly allocated page.
+  */
+ static BlockNumber
+ rm_get_phys_blkno(mmRevmapAccess *rmAccess, BlockNumber mapBlk, bool extend)
+ {
+ 	int		arrayBlkIdx;
+ 	BlockNumber arrayBlk;
+ 	RevmapArrayContents *contents;
+ 	int		revmapIdx;
+ 	BlockNumber targetblk;
+ 
+ 	/* the first revmap page is always block number 1 */
+ 	if (mapBlk == 0)
+ 		return (BlockNumber) 1;
+ 
+ 	/*
+ 	 * For all other cases, take the long route of checking the metapage and
+ 	 * revmap array pages.
+ 	 */
+ 
+ 	/*
+ 	 * Copy the revmap array from the metapage into private storage, if not
+ 	 * done already in this scan.
+ 	 */
+ 	if (rmAccess->revmapArrayPages == NULL)
+ 	{
+ 		Buffer	meta;
+ 
+ 		meta = rmaccess_get_metapage(rmAccess, BUFFER_LOCK_SHARE);
+ 		UnlockReleaseBuffer(meta);
+ 	}
+ 
+ 	/*
+ 	 * Consult the metapage array; if the array page we need is not set there,
+ 	 * we need to extend the index to allocate the array page, and update the
+ 	 * metapage array.
+ 	 */
+ 	arrayBlkIdx = MAPBLK_TO_RMARRAY_BLK(mapBlk);
+ 	if (arrayBlkIdx > MAX_REVMAP_ARRAYPAGES)
+ 		elog(ERROR, "non-existant revmap array page requested");
+ 
+ 	arrayBlk = rmAccess->revmapArrayPages[arrayBlkIdx];
+ 	if (arrayBlk == InvalidBlockNumber)
+ 	{
+ 		Buffer			meta;
+ 
+ 		/* if not asked to extend, there's no further work to do here */
+ 		if (!extend)
+ 			return InvalidBlockNumber;
+ 
+ 		/*
+ 		 * If we need to create a new array page, check the metapage again;
+ 		 * someone might have created it after the last time we read the
+ 		 * metapage.  This time we acquire an exclusive lock, since we may need
+ 		 * to extend.  Lock before doing the physical relation extension, to
+ 		 * avoid leaving an unused page around in case someone does this
+ 		 * concurrently.  Note that, unfortunately, we will be keeping the lock
+ 		 * on the metapage alongside the relation extension lock, while doing a
+ 		 * syscall involving disk I/O.  Extending to add a new revmap array page
+ 		 * is fairly infrequent, so it shouldn't be too bad.
+ 		 *
+ 		 * XXX it is possible to extend the relation unconditionally before
+ 		 * locking the metapage, and later if we find that someone else had
+ 		 * already added this page, save the page in FSM as MaxFSMRequestSize.
+ 		 * That would be better for concurrency.  Explore someday.
+ 		 */
+ 		meta = rmaccess_get_metapage(rmAccess, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 		if (rmAccess->revmapArrayPages[arrayBlkIdx] == InvalidBlockNumber)
+ 		{
+ 			BlockNumber	newPgBlkno;
+ 
+ 			/*
+ 			 * Ok, definitely need to allocate a new revmap array page;
+ 			 * initialize a new page to the initial (empty) array revmap state
+ 			 * and register it in metapage.
+ 			 */
+ 			START_CRIT_SECTION();
+ 			rmAccess->currArrayBuf = mm_getnewbuffer(rmAccess->idxrel);
+ 			initialize_rma_page(rmAccess->currArrayBuf);
+ 			MarkBufferDirty(rmAccess->currArrayBuf);
+ 			if (RelationNeedsWAL(rmAccess->idxrel))
+ 			{
+ 				xl_minmax_init_rmpg	xlrec;
+ 				XLogRecPtr		recptr;
+ 				XLogRecData		rdata;
+ 
+ 				xlrec.node = rmAccess->idxrel->rd_node;
+ 				xlrec.blkno = BufferGetBlockNumber(rmAccess->currArrayBuf);
+ 				xlrec.array = true;
+ 				xlrec.logblk = InvalidBlockNumber;
+ 
+ 				rdata.data = (char *) &xlrec;
+ 				rdata.len = SizeOfMinmaxInitRmpg;
+ 				rdata.buffer = InvalidBuffer;	/* FIXME */
+ 				rdata.buffer_std = false;
+ 				rdata.next = NULL;
+ 
+ 				recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_INIT_RMPG, &rdata);
+ 				PageSetLSN(BufferGetPage(rmAccess->currArrayBuf), recptr);
+ 			}
+ 			END_CRIT_SECTION();
+ 			LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_UNLOCK);
+ 			newPgBlkno = BufferGetBlockNumber(rmAccess->currArrayBuf);
+ 			rmAccess->revmapArrayPages[arrayBlkIdx] = newPgBlkno;
+ 
+ 			MINMAX_elog(DEBUG2, "allocated block for revmap array page: %u",
+ 				 BufferGetBlockNumber(rmAccess->currArrayBuf));
+ 
+ 			/* Update the metapage to point to the new array page. */
+ 			update_minmax_metapg(rmAccess->idxrel, meta, arrayBlkIdx,
+ 								 newPgBlkno);
+ 		}
+ 
+ 		UnlockReleaseBuffer(meta);
+ 		arrayBlk = rmAccess->revmapArrayPages[arrayBlkIdx];
+ 	}
+ 
+ 	/*
+ 	 * By here, we know the array page is set in the metapage array.  Read that
+ 	 * page; except that if we just allocated it, or we already hold pin on it,
+ 	 * we don't need to read it again.  XXX but we didn't hold lock!
+ 	 */
+ 	Assert(arrayBlk != InvalidBlockNumber);
+ 
+ 	if (rmAccess->currArrayBuf == InvalidBuffer ||
+ 		BufferGetBlockNumber(rmAccess->currArrayBuf) != arrayBlk)
+ 	{
+ 		if (rmAccess->currArrayBuf != InvalidBuffer)
+ 			ReleaseBuffer(rmAccess->currArrayBuf);
+ 
+ 		rmAccess->currArrayBuf =
+ 			ReadBuffer(rmAccess->idxrel, arrayBlk);
+ 	}
+ 
+ 	LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_SHARE);
+ 
+ 	/*
+ 	 * And now we can inspect its contents; if the target page is set, we can
+ 	 * just return.  Even if not set, we can also return if caller asked us not
+ 	 * to extend the revmap.
+ 	 */
+ 	contents = (RevmapArrayContents *)
+ 		PageGetContents(BufferGetPage(rmAccess->currArrayBuf));
+ 	revmapIdx = MAPBLK_TO_RMARRAY_INDEX(mapBlk);
+ 	if (!extend || revmapIdx <= contents->rma_nblocks - 1)
+ 	{
+ 		LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_UNLOCK);
+ 
+ 		return contents->rma_blocks[revmapIdx];
+ 	}
+ 
+ 	/*
+ 	 * Trade our shared lock in the array page for exclusive, because we now
+ 	 * need to allocate one more revmap page and modify the array page.
+ 	 */
+ 	LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_UNLOCK);
+ 	LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 	contents = (RevmapArrayContents *)
+ 		PageGetContents(BufferGetPage(rmAccess->currArrayBuf));
+ 
+ 	/*
+ 	 * If someone else already set the value while we were waiting for the
+ 	 * exclusive lock, we're done; otherwise, allocate a new block as the
+ 	 * new revmap page, and update the array page to point to it.
+ 	 *
+ 	 * FIXME -- what if we were asked not to extend?
+ 	 */
+ 	if (contents->rma_blocks[revmapIdx] != InvalidBlockNumber)
+ 	{
+ 		targetblk = contents->rma_blocks[revmapIdx];
+ 	}
+ 	else
+ 	{
+ 		Buffer		newbuf;
+ 
+ 		START_CRIT_SECTION();
+ 		newbuf = mm_getnewbuffer(rmAccess->idxrel);
+ 		targetblk = initialize_rmr_page(newbuf, mapBlk);
+ 		MarkBufferDirty(newbuf);
+ 		if (RelationNeedsWAL(rmAccess->idxrel))
+ 		{
+ 			xl_minmax_init_rmpg	xlrec;
+ 			XLogRecPtr	recptr;
+ 			XLogRecData	rdata;
+ 
+ 			xlrec.node = rmAccess->idxrel->rd_node;
+ 			xlrec.blkno = BufferGetBlockNumber(newbuf);
+ 			xlrec.array = false;
+ 			xlrec.logblk = mapBlk;
+ 
+ 			rdata.data = (char *) &xlrec;
+ 			rdata.len = SizeOfMinmaxInitRmpg;
+ 			rdata.buffer = InvalidBuffer;
+ 			rdata.buffer_std = false;
+ 			rdata.next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_INIT_RMPG, &rdata);
+ 			PageSetLSN(BufferGetPage(newbuf), recptr);
+ 		}
+ 		END_CRIT_SECTION();
+ 
+ 		UnlockReleaseBuffer(newbuf);
+ 
+ 		/*
+ 		 * Modify the revmap array page to point to the newly allocated revmap
+ 		 * page.
+ 		 */
+ 		START_CRIT_SECTION();
+ 
+ 		contents->rma_blocks[revmapIdx] = targetblk;
+ 		/*
+ 		 * XXX this rma_nblocks assignment should probably be conditional on the
+ 		 * current rma_blocks value.
+ 		 */
+ 		contents->rma_nblocks = revmapIdx + 1;
+ 		MarkBufferDirty(rmAccess->currArrayBuf);
+ 
+ 		/* XLOG stuff */
+ 		if (RelationNeedsWAL(rmAccess->idxrel))
+ 		{
+ 			xl_minmax_rmarray_set	xlrec;
+ 			XLogRecPtr		recptr;
+ 			XLogRecData		rdata[2];
+ 			uint8			info;
+ 
+ 			info = XLOG_MINMAX_RMARRAY_SET;
+ 
+ 			xlrec.node = rmAccess->idxrel->rd_node;
+ 			xlrec.rmarray = BufferGetBlockNumber(rmAccess->currArrayBuf);
+ 			xlrec.blkidx = revmapIdx;
+ 			xlrec.newpg = targetblk;
+ 
+ 			rdata[0].data = (char *) &xlrec;
+ 			rdata[0].len = SizeOfMinmaxRmarraySet;
+ 			rdata[0].buffer = InvalidBuffer;
+ 			rdata[0].buffer_std = false;
+ 			rdata[0].next = &rdata[1];
+ 
+ 			rdata[1].data = NULL;
+ 			rdata[1].len = 0;
+ 			rdata[1].buffer = rmAccess->currArrayBuf;
+ 			rdata[1].buffer_std = false;
+ 			rdata[1].next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 			PageSetLSN(BufferGetPage(rmAccess->currArrayBuf), recptr);
+ 		}
+ 
+ 		END_CRIT_SECTION();
+ 	}
+ 
+ 	LockBuffer(rmAccess->currArrayBuf, BUFFER_LOCK_UNLOCK);
+ 
+ 	return targetblk;
+ }
+ 
+ /*
+  * Set the TID of the index entry corresponding to the range that includes
+  * the given heap page to the given item pointer.
+  *
+  * The map is extended, if necessary.
+  */
+ void
+ mmSetHeapBlockItemptr(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 					  BlockNumber blkno, OffsetNumber offno)
+ {
+ 	BlockNumber mapBlk;
+ 	bool		extend = false;
+ 
+ 	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
+ 
+ 	/* Translate the map block number to physical location */
+ 	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, true);
+ 
+ 	MINMAX_elog(DEBUG2, "setting %u/%u in logical page %lu (physical %u) for heap %u",
+ 				blkno, offno,
+ 				HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk),
+ 				mapBlk, heapBlk);
+ 
+ 	/*
+ 	 * Obtain the buffer from which we need to read.  If we already have the
+ 	 * correct buffer in our access struct, use that; otherwise, release that,
+ 	 * (if valid) and read the one we need.
+ 	 */
+ 	if (rmAccess->currBuf == InvalidBuffer ||
+ 		mapBlk != BufferGetBlockNumber(rmAccess->currBuf))
+ 	{
+ 		if (rmAccess->currBuf != InvalidBuffer)
+ 			ReleaseBuffer(rmAccess->currBuf);
+ 
+ 		Assert(mapBlk != InvalidBlockNumber);
+ 		rmAccess->currBuf = ReadBuffer(rmAccess->idxrel, mapBlk);
+ 	}
+ 
+ 	LockBuffer(rmAccess->currBuf, BUFFER_LOCK_EXCLUSIVE);
+ 	START_CRIT_SECTION();
+ 
+ 	rm_page_set_iptr(BufferGetPage(rmAccess->currBuf),
+ 					 rmAccess->pagesPerRange,
+ 					 heapBlk,
+ 					 blkno, offno);
+ 
+ 	MarkBufferDirty(rmAccess->currBuf);
+ 
+ 	/* XLOG stuff */
+ 	if (RelationNeedsWAL(rmAccess->idxrel))
+ 	{
+ 		xl_minmax_rm_set	xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata[2];
+ 		uint8		info;
+ 
+ 		info = XLOG_MINMAX_REVMAP_SET;
+ 
+ 		xlrec.node = rmAccess->idxrel->rd_node;
+ 		xlrec.mapBlock = mapBlk;
+ 		xlrec.pagesPerRange = rmAccess->pagesPerRange;
+ 		xlrec.heapBlock = heapBlk;
+ 		ItemPointerSet(&(xlrec.newval), blkno, offno);
+ 
+ 		rdata[0].data = (char *) &xlrec;
+ 		rdata[0].len = SizeOfMinmaxRevmapSet;
+ 		rdata[0].buffer = InvalidBuffer;
+ 		rdata[0].buffer_std = false;
+ 		rdata[0].next = &(rdata[1]);
+ 
+ 		rdata[1].data = NULL;
+ 		rdata[1].len = 0;
+ 		rdata[1].buffer = rmAccess->currBuf;
+ 		rdata[1].buffer_std = false;
+ 		rdata[1].next = NULL;
+ 
+ 		if (extend)
+ 		{
+ 			info |= XLOG_MINMAX_INIT_PAGE;
+ 			/* If the page is new, there's no need for a full page image */
+ 			rdata[0].next = NULL;
+ 		}
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 		PageSetLSN(BufferGetPage(rmAccess->currBuf), recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK);
+ }
+ 
+ 
+ /*
+  * Return the TID of the index entry corresponding to the range that includes
+  * the given heap page.  If the TID is valid, the tuple is locked with
+  * LockTuple.  It is the caller's responsibility to release that lock.
+  */
+ void
+ mmGetHeapBlockItemptr(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 					  ItemPointerData *out)
+ {
+ 	BlockNumber mapBlk;
+ 	RevmapContents *contents;
+ 	ItemPointerData *iptr;
+ 
+ 	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
+ 	/* Translate the map block number to physical location */
+ 	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, false);
+ 	if (mapBlk == InvalidBlockNumber)
+ 	{
+ 		ItemPointerSetInvalid(out);
+ 		return;
+ 	}
+ 
+ 	if (rmAccess->currBuf == InvalidBuffer ||
+ 		BufferGetBlockNumber(rmAccess->currBuf) != mapBlk)
+ 	{
+ 		if (rmAccess->currBuf != InvalidBuffer)
+ 			ReleaseBuffer(rmAccess->currBuf);
+ 
+ 		Assert(mapBlk != InvalidBlockNumber);
+ 		rmAccess->currBuf = ReadBuffer(rmAccess->idxrel, mapBlk);
+ 	}
+ 
+ 	LockBuffer(rmAccess->currBuf, BUFFER_LOCK_SHARE);
+ 
+ 	contents = (RevmapContents *)
+ 		PageGetContents(BufferGetPage(rmAccess->currBuf));
+ 	iptr = contents->rmr_tids;
+ 	iptr += HEAPBLK_TO_REVMAP_INDEX(rmAccess->pagesPerRange, heapBlk);
+ 
+ 	ItemPointerCopy(iptr, out);
+ 
+ 	if (ItemPointerIsValid(iptr))
+ 		LockTuple(rmAccess->idxrel, iptr, ShareLock);
+ 
+ 	LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK);
+ }
+ 
+ /*
+  * Initialize the revmap of a new minmax index.
+  *
+  * NB -- caller is assumed to WAL-log this operation
+  */
+ void
+ mmRevmapCreate(Relation idxrel)
+ {
+ 	Buffer		buf;
+ 
+ 	/*
+ 	 * The first page of the revmap is always stored in block number 1 of the
+ 	 * main fork.  Because of this, the only thing we need to do is request
+ 	 * a new page; we assume we are called immediately after the metapage has
+ 	 * been initialized.
+ 	 */
+ 	buf = mm_getnewbuffer(idxrel);
+ 	Assert(BufferGetBlockNumber(buf) == 1);
+ 
+ 	mm_page_init(BufferGetPage(buf), MINMAX_PAGETYPE_REVMAP);
+ 	MarkBufferDirty(buf);
+ 
+ 	UnlockReleaseBuffer(buf);
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmtuple.c
***************
*** 0 ****
--- 1,388 ----
+ /*
+  * MinMax-specific tuples
+  *		Method implementations for tuples in minmax indexes.
+  *
+  * The intended interface is that code outside this file only deals with
+  * DeformedMMTuples, and convert to and from the on-disk representation by
+  * using functions in this file.
+  *
+  * NOTES
+  *
+  * A minmax tuple is similar to a heap tuple, with a few key differences.  The
+  * first interesting difference is that the tuple header is much simpler, only
+  * containing its total length and a small area for flags.	Also, the stored
+  * data does not match the tuple descriptor exactly: for each attribute in the
+  * descriptor, the index tuple carries two values, one for the minimum value in
+  * that column and one for the maximum.
+  *
+  * Also, for each column there are two null bits: one (hasnulls) stores whether
+  * any tuple within the page range has that column set to null; the other
+  * (allnulls) stores whether the column values are all null.  If allnulls is
+  * true, then the tuple data area does not contain min/max values for that
+  * column at all; whereas it does if the hasnulls is set.  Note we always store
+  * a double-length null bitmask; for typical indexes of four columns or less,
+  * they take a single byte anyway.	It doesn't seem worth trying to optimize
+  * this further.
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmtuple.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
+ #include "access/minmax_tuple.h"
+ #include "access/tupdesc.h"
+ #include "access/tupmacs.h"
+ 
+ 
+ static inline void mm_deconstruct_tuple(char *tp, bits8 *nullbits, bool nulls,
+ 					 int natts, Form_pg_attribute *att,
+ 					 Datum *values, bool *allnulls, bool *hasnulls);
+ 
+ 
+ /*
+  * Generate an internal-style tuple descriptor to pass to minmax_form_tuple.
+  * These have no use outside this module.
+  *
+  * The argument is a minmax index' regular tuple descriptor.
+  */
+ TupleDesc
+ minmax_get_descr(TupleDesc tupdesc)
+ {
+ 	TupleDesc	diskDesc;
+ 	int			i,
+ 				j;
+ 
+ 	diskDesc = CreateTemplateTupleDesc(tupdesc->natts * 2, false);
+ 
+ 	for (i = 0, j = 1; i < tupdesc->natts; i++)
+ 	{
+ 		/* min */
+ 		TupleDescInitEntry(diskDesc,
+ 						   j++,
+ 						   NULL,
+ 						   tupdesc->attrs[i]->atttypid,
+ 						   tupdesc->attrs[i]->atttypmod,
+ 						   0);
+ 		/* max */
+ 		TupleDescInitEntry(diskDesc,
+ 						   j++,
+ 						   NULL,
+ 						   tupdesc->attrs[i]->atttypid,
+ 						   tupdesc->attrs[i]->atttypmod,
+ 						   0);
+ 	}
+ 
+ 	return diskDesc;
+ }
+ 
+ /*
+  * Generate a new on-disk tuple to be inserted in a minmax index.
+  *
+  * The first tuple descriptor passed corresponds to the catalogued index info,
+  * that is, it is the index's descriptor; the second descriptor must be
+  * obtained by calling minmax_get_descr() on that descriptor.
+  *
+  * (The reason for this slightly grotty arrangement is that we use heap tuple
+  * functions to implement packing of a tuple into the on-disk format.)
+  */
+ MMTuple *
+ minmax_form_tuple(TupleDesc idxDsc, TupleDesc diskDsc, DeformedMMTuple *tuple,
+ 				  Size *size)
+ {
+ 	Datum	   *values;
+ 	bool	   *nulls;
+ 	bool		anynulls = false;
+ 	MMTuple    *rettuple;
+ 	int			keyno;
+ 	uint16		phony_infomask;
+ 	bits8	   *phony_nullbitmap;
+ 	Size		len,
+ 				hoff,
+ 				data_len;
+ 
+ 	Assert(diskDsc->natts > 0);
+ 
+ 	values = palloc(sizeof(Datum) * diskDsc->natts);
+ 	nulls = palloc0(sizeof(bool) * diskDsc->natts);
+ 	phony_nullbitmap = palloc(sizeof(bits8) * BITMAPLEN(diskDsc->natts));
+ 
+ 	/*
+ 	 * Set up the values/nulls arrays for heap_fill_tuple
+ 	 */
+ 	for (keyno = 0; keyno < idxDsc->natts; keyno++)
+ 	{
+ 		int		idxattno = keyno * 2;
+ 
+ 		/*
+ 		 * "allnulls" is set when there's no nonnull value in any row in
+ 		 * the column; set the nullable bits for both min and max attrs.
+ 		 */
+ 		if (tuple->values[keyno].allnulls)
+ 		{
+ 			nulls[idxattno] = true;
+ 			nulls[idxattno + 1] = true;
+ 			anynulls = true;
+ 			continue;
+ 		}
+ 
+ 		if (tuple->values[keyno].hasnulls)
+ 			anynulls = true;
+ 
+ 		values[idxattno] = tuple->values[keyno].min;
+ 		values[idxattno + 1] = tuple->values[keyno].max;
+ 	}
+ 
+ 	/* compute total space needed */
+ 	len = SizeOfMinMaxTuple;
+ 	if (anynulls)
+ 	{
+ 		/*
+ 		 * We need a double-length bitmap on an on-disk minmax index tuple;
+ 		 * the first half stores the "allnulls" bits, the second stores
+ 		 * "hasnulls".
+ 		 */
+ 		len += BITMAPLEN(idxDsc->natts * 2);
+ 	}
+ 
+ 	/*
+ 	 * TODO: we can probably do away with alignment here, and save some
+ 	 * precious disk space.  When there's no bitmap we can save 6 bytes. Maybe
+ 	 * we can use the first col's type alignment instead of maxalign.
+ 	 */
+ 	len = hoff = MAXALIGN(len);
+ 
+ 	data_len = heap_compute_data_size(diskDsc, values, nulls);
+ 
+ 	len += data_len;
+ 
+ 	rettuple = palloc0(len);
+ 	rettuple->mt_info = hoff;
+ 	Assert((rettuple->mt_info & MMIDX_OFFSET_MASK) == hoff);
+ 
+ 	/*
+ 	 * The infomask and null bitmap as computed by heap_fill_tuple are useless
+ 	 * to us.  However, that function will not accept a null infomask; and we
+ 	 * need to pass a valid null bitmap so that it will correctly skip
+ 	 * outputting null attributes in the data area.
+ 	 */
+ 	heap_fill_tuple(diskDsc,
+ 					values,
+ 					nulls,
+ 					(char *) rettuple + hoff,
+ 					data_len,
+ 					&phony_infomask,
+ 					phony_nullbitmap);
+ 
+ 	/* done with these */
+ 	pfree(values);
+ 	pfree(nulls);
+ 	pfree(phony_nullbitmap);
+ 
+ 	/*
+ 	 * Now fill in the real null bitmasks.	allnulls first.
+ 	 */
+ 	if (anynulls)
+ 	{
+ 		bits8	   *bitP;
+ 		int			bitmask;
+ 
+ 		rettuple->mt_info |= MMIDX_NULLS_MASK;
+ 
+ 		bitP = ((bits8 *) (rettuple + SizeOfMinMaxTuple)) - 1;
+ 		bitmask = HIGHBIT;
+ 		for (keyno = 0; keyno < idxDsc->natts; keyno++)
+ 		{
+ 			if (bitmask != HIGHBIT)
+ 				bitmask <<= 1;
+ 			else
+ 			{
+ 				bitP += 1;
+ 				*bitP = 0x0;
+ 				bitmask = 1;
+ 			}
+ 
+ 			if (tuple->values[keyno].allnulls)
+ 				continue;
+ 
+ 			*bitP |= bitmask;
+ 		}
+ 		/* hasnulls bits follow */
+ 		for (keyno = 0; keyno < idxDsc->natts; keyno++)
+ 		{
+ 			if (bitmask != HIGHBIT)
+ 				bitmask <<= 1;
+ 			else
+ 			{
+ 				bitP += 1;
+ 				*bitP = 0x0;
+ 				bitmask = 1;
+ 			}
+ 
+ 			if (tuple->values[keyno].hasnulls)
+ 				continue;
+ 
+ 			*bitP |= bitmask;
+ 		}
+ 	}
+ 
+ 	*size = len;
+ 	return rettuple;
+ }
+ 
+ /*
+  * Free a tuple created by minmax_form_tuple
+  */
+ void
+ minmax_free_tuple(MMTuple *tuple)
+ {
+ 	pfree(tuple);
+ }
+ 
+ /*
+  * Convert a MMTuple back to a DeformedMMTuple.  This is the reverse of
+  * minmax_form_tuple.
+  *
+  * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
+  * deconstruct the tuple from the on-disk format.
+  *
+  * XXX some callers might need copies of each datum; if so we need
+  * to apply datumCopy inside the loop.	We probably also need a
+  * minmax_free_dtuple() function.
+  */
+ DeformedMMTuple *
+ minmax_deform_tuple(TupleDesc tupdesc, MMTuple *tuple)
+ {
+ 	DeformedMMTuple *dtup;
+ 	Datum	   *values;
+ 	bool	   *allnulls;
+ 	bool	   *hasnulls;
+ 	char	   *tp;
+ 	bits8	   *nullbits = NULL;
+ 	int			keyno;
+ 
+ 	dtup = palloc(offsetof(DeformedMMTuple, values) +
+ 				  sizeof(MMValues) * tupdesc->natts);
+ 
+ 	values = palloc(sizeof(Datum) * tupdesc->natts * 2);
+ 	allnulls = palloc(sizeof(bool) * tupdesc->natts);
+ 	hasnulls = palloc(sizeof(bool) * tupdesc->natts);
+ 
+ 	tp = (char *) tuple + MMTupleDataOffset(tuple);
+ 
+ 	if (MMTupleHasNulls(tuple))
+ 		nullbits = (bits8 *) ((char *) tuple + SizeOfMinMaxTuple);
+ 	mm_deconstruct_tuple(tp, nullbits,
+ 						 MMTupleHasNulls(tuple),
+ 						 tupdesc->natts, tupdesc->attrs, values,
+ 						 allnulls, hasnulls);
+ 
+ 	for (keyno = 0; keyno < tupdesc->natts; keyno++)
+ 	{
+ 		if (allnulls[keyno])
+ 		{
+ 			dtup->values[keyno].allnulls = true;
+ 			continue;
+ 		}
+ 
+ 		/* XXX optional datumCopy() */
+ 		dtup->values[keyno].min = values[keyno * 2];
+ 		dtup->values[keyno].max = values[keyno * 2 + 1];
+ 		dtup->values[keyno].hasnulls = hasnulls[keyno];
+ 		dtup->values[keyno].allnulls = false;
+ 	}
+ 
+ 	pfree(values);
+ 	pfree(allnulls);
+ 	pfree(hasnulls);
+ 
+ 	return dtup;
+ }
+ 
+ /*
+  * mm_deconstruct_tuple
+  *		Guts of attribute extraction from an on-disk minmax tuple.
+  *
+  * Its arguments are:
+  *	tp			pointer to the tuple data area
+  *	nullbits	pointer to the tuple nulls bitmask
+  *	nulls		"has nulls" bit in tuple infomask
+  *	natts		number of array members in att
+  *	att			the tuple's TupleDesc Form_pg_attribute array
+  *	values		output values, size 2 * natts (alternates min and max)
+  *	allnulls	output "allnulls", size natts
+  *	hasnulls	output "hasnulls", size natts
+  *
+  * Output arrays are allocated by caller.
+  */
+ static inline void
+ mm_deconstruct_tuple(char *tp, bits8 *nullbits, bool nulls,
+ 					 int natts, Form_pg_attribute *att,
+ 					 Datum *values, bool *allnulls, bool *hasnulls)
+ {
+ 	int			attnum;
+ 	long		off = 0;
+ 
+ 	/*
+ 	 * First iterate to natts to obtain both null flags for each attribute.
+ 	 */
+ 	for (attnum = 0; attnum < natts; attnum++)
+ 	{
+ 		/*
+ 		 * the "all nulls" bit means that all values in the page range for
+ 		 * this column are nulls.  Therefore there are no values in the tuple
+ 		 * data area.
+ 		 */
+ 		if (nulls && att_isnull(attnum, nullbits))
+ 		{
+ 			values[attnum] = (Datum) 0;
+ 			allnulls[attnum] = true;
+ 			hasnulls[attnum] = true;	/* XXX ? */
+ 			continue;
+ 		}
+ 
+ 		allnulls[attnum] = false;
+ 
+ 		/*
+ 		 * the "has nulls" bit means that some tuples have nulls, but others
+ 		 * have not-null values.  So the tuple data does have data for this
+ 		 * column.
+ 		 *
+ 		 * The hasnulls bits follow the allnulls bits in the same bitmask.
+ 		 */
+ 		hasnulls[attnum] = nulls && att_isnull(natts + attnum, hasnulls);
+ 	}
+ 
+ 	/*
+ 	 * The we iterate to natts * 2 to obtain each attribute's min and max
+ 	 * values.	Note that since we reuse attribute entries (first for the
+ 	 * minimum value of the corresponding column, then for max), we cannot
+ 	 * cache offsets here.
+ 	 */
+ 	for (attnum = 0; attnum < natts * 2; attnum++)
+ 	{
+ 		int			true_attnum = attnum / 2;
+ 		Form_pg_attribute thisatt = att[true_attnum];
+ 
+ 		if (allnulls[true_attnum])
+ 			continue;
+ 
+ 		if (thisatt->attlen == -1)
+ 		{
+ 			off = att_align_pointer(off, thisatt->attalign, -1,
+ 									tp + off);
+ 		}
+ 		else
+ 		{
+ 			/* not varlena, so safe to use att_align_nominal */
+ 			off = att_align_nominal(off, thisatt->attalign);
+ 		}
+ 
+ 		values[attnum] = fetchatt(thisatt, tp + off);
+ 
+ 		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+ 	}
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmxlog.c
***************
*** 0 ****
--- 1,304 ----
+ /*
+  * mmxlog.c
+  *		XLog replay routines for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmxlog.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_tuple.h"
+ #include "access/minmax_xlog.h"
+ #include "access/xlogutils.h"
+ #include "storage/freespace.h"
+ 
+ 
+ /*
+  * xlog replay routines
+  */
+ static void
+ minmax_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_createidx *xlrec = (xl_minmax_createidx *) XLogRecGetData(record);
+ 	Buffer		buf;
+ 	Page		page;
+ 
+ 	/* Backup blocks are not used in create_index records */
+ 	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ 
+ 	/* create the index' metapage */
+ 	buf = XLogReadBuffer(xlrec->node, MINMAX_METAPAGE_BLKNO, true);
+ 	Assert(BufferIsValid(buf));
+ 	page = (Page) BufferGetPage(buf);
+ 	mm_metapage_init(page);
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buf);
+ 	UnlockReleaseBuffer(buf);
+ 
+ 	/* also initialize its first revmap page */
+ 	buf = XLogReadBuffer(xlrec->node, 1, true);
+ 	Assert(BufferIsValid(buf));
+ 	page = (Page) BufferGetPage(buf);
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buf);
+ 	UnlockReleaseBuffer(buf);
+ }
+ 
+ static void
+ minmax_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_insert *xlrec = (xl_minmax_insert *) XLogRecGetData(record);
+ 	BlockNumber	blkno;
+ 	Buffer		buffer;
+ 	Page		page;
+ 	OffsetNumber offnum;
+ 	int			tuplen;
+ 	MMTuple	   *mmtuple;
+ 
+ 	/* If we have a full-page image, restore it and we're done */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid));
+ 	if (record->xl_info & XLOG_MINMAX_INIT_PAGE)
+ 	{
+ 		buffer = XLogReadBuffer(xlrec->target.node, blkno, true);
+ 		Assert(BufferIsValid(buffer));
+ 		page = (Page) BufferGetPage(buffer);
+ 
+ 		mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
+ 	}
+ 	else
+ 	{
+ 		buffer = XLogReadBuffer(xlrec->target.node, blkno, false);
+ 		if (!BufferIsValid(buffer))
+ 			return;
+ 		page = (Page) BufferGetPage(buffer);
+ 
+ 		if (lsn <= PageGetLSN(page))	/* changes are applied */
+ 		{
+ 			UnlockReleaseBuffer(buffer);
+ 			return;
+ 		}
+ 	}
+ 	offnum = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
+ 	if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ 		elog(PANIC, "minmax_xlog_insert: invalid max offset number");
+ 
+ 	tuplen = record->xl_len - SizeOfMinmaxInsert;
+ 	mmtuple = (MMTuple *) ((char *) xlrec + SizeOfMinmaxInsert);
+ 
+ 	if (xlrec->overwrite)
+ 		PageOverwriteItemData(page, offnum, (Item) mmtuple, tuplen);
+ 	else
+ 	{
+ 		offnum = PageAddItem(page, (Item) mmtuple, tuplen, offnum, true, false);
+ 		if (offnum == InvalidOffsetNumber)
+ 			elog(PANIC, "minmax_xlog_insert: failed to add tuple");
+ 	}
+ 
+ 	PageSetLSN(page, lsn);
+ 
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ 
+ 	/* XXX no FSM updates here ... */
+ }
+ 
+ static void
+ minmax_xlog_bulkremove(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_bulkremove *xlrec = (xl_minmax_bulkremove *) XLogRecGetData(record);
+ 	Buffer		buffer;
+ 	Page		page;
+ 	OffsetNumber *offnos;
+ 	int			noffs;
+ 	Size		freespace;
+ 
+ 	/* If we have a full-page image, restore it and we're done */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ 	if (!BufferIsValid(buffer))
+ 		return;
+ 	page = (Page) BufferGetPage(buffer);
+ 
+ 	if (lsn <= PageGetLSN(page))	/* changes are applied */
+ 	{
+ 		UnlockReleaseBuffer(buffer);
+ 		return;
+ 	}
+ 
+ 	offnos = (OffsetNumber *) ((char *) xlrec + SizeOfMinmaxBulkRemove);
+ 	noffs = (record->xl_len - SizeOfMinmaxBulkRemove) / sizeof(OffsetNumber);
+ 
+ 	PageIndexDeleteNoCompact(page, offnos, noffs);
+ 	freespace = PageGetFreeSpace(page);
+ 
+ 	PageSetLSN(page, lsn);
+ 
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ 
+ 	/* update FSM as well */
+ 	XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
+ }
+ 
+ static void
+ minmax_xlog_revmap_set(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_rm_set *xlrec = (xl_minmax_rm_set *) XLogRecGetData(record);
+ 	bool	init;
+ 	Buffer	buffer;
+ 	Page	page;
+ 
+ 	/* If we have a full-page image, restore it and we're done */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	init = (record->xl_info & XLOG_MINMAX_INIT_PAGE) != 0;
+ 	buffer = XLogReadBuffer(xlrec->node, xlrec->mapBlock, init);
+ 	Assert(BufferIsValid(buffer));
+ 	page = BufferGetPage(buffer);
+ 	if (init)
+ 		mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 
+ 	rm_page_set_iptr(page, xlrec->pagesPerRange, xlrec->heapBlock,
+ 					 ItemPointerGetBlockNumber(&(xlrec->newval)),
+ 					 ItemPointerGetOffsetNumber(&(xlrec->newval)));
+ 
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ }
+ 
+ static void
+ minmax_xlog_metapg_set(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_metapg_set *xlrec = (xl_minmax_metapg_set *) XLogRecGetData(record);
+ 	Buffer	meta;
+ 	Page	metapg;
+ 	MinmaxMetaPageData *metadata;
+ 
+ 	/* If we have a full-page image, restore it and we're done */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	meta = XLogReadBuffer(xlrec->node, MINMAX_METAPAGE_BLKNO, false);
+ 	Assert(BufferIsValid(meta));
+ 
+ 	metapg = BufferGetPage(meta);
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(metapg);
+ 	metadata->revmapArrayPages[xlrec->blkidx] = xlrec->newpg;
+ 
+ 	PageSetLSN(metapg, lsn);
+ 	MarkBufferDirty(meta);
+ 	UnlockReleaseBuffer(meta);
+ }
+ 
+ static void
+ minmax_xlog_init_rmpg(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_init_rmpg *xlrec = (xl_minmax_init_rmpg *) XLogRecGetData(record);
+ 	Buffer		buffer;
+ 
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
+ 	Assert(BufferIsValid(buffer));
+ 
+ 	if (xlrec->array)
+ 		initialize_rma_page(buffer);
+ 	else
+ 		initialize_rmr_page(buffer, xlrec->logblk);
+ 
+ 	PageSetLSN(BufferGetPage(buffer), lsn);
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ }
+ 
+ static void
+ minmax_xlog_rmarray_set(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_rmarray_set *xlrec = (xl_minmax_rmarray_set *) XLogRecGetData(record);
+ 	Buffer	buffer;
+ 	Page	page;
+ 	RevmapArrayContents *contents;
+ 
+ 	/* If we have a full-page image, restore it and we're done */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 		return;
+ 	}
+ 
+ 	buffer = XLogReadBuffer(xlrec->node, xlrec->rmarray, false);
+ 	Assert(BufferIsValid(buffer));
+ 
+ 	page = BufferGetPage(buffer);
+ 
+ 	contents = (RevmapArrayContents *) PageGetContents(page);
+ 	contents->rma_blocks[xlrec->blkidx] = xlrec->newpg;
+ 	contents->rma_nblocks = xlrec->blkidx + 1;	/* XXX is this okay? */
+ 
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buffer);
+ 	UnlockReleaseBuffer(buffer);
+ }
+ 
+ void
+ minmax_redo(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	switch (info & XLOG_MINMAX_OPMASK)
+ 	{
+ 		case XLOG_MINMAX_CREATE_INDEX:
+ 			minmax_xlog_createidx(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_INSERT:
+ 			minmax_xlog_insert(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_BULKREMOVE:
+ 			minmax_xlog_bulkremove(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_REVMAP_SET:
+ 			minmax_xlog_revmap_set(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_METAPG_SET:
+ 			minmax_xlog_metapg_set(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_RMARRAY_SET:
+ 			minmax_xlog_rmarray_set(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_INIT_RMPG:
+ 			minmax_xlog_init_rmpg(lsn, record);
+ 			break;
+ 		default:
+ 			elog(PANIC, "minmax_redo: unknown op code %u", info);
+ 	}
+ }
*** a/src/backend/access/rmgrdesc/Makefile
--- b/src/backend/access/rmgrdesc/Makefile
***************
*** 9,15 **** top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
  OBJS = clogdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \
! 	   mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o smgrdesc.o spgdesc.o \
  	   standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 9,16 ----
  include $(top_builddir)/src/Makefile.global
  
  OBJS = clogdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \
! 	   minmaxdesc.o mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o \
! 	   smgrdesc.o spgdesc.o \
  	   standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
  
  include $(top_srcdir)/src/backend/common.mk
*** /dev/null
--- b/src/backend/access/rmgrdesc/minmaxdesc.c
***************
*** 0 ****
--- 1,95 ----
+ /*-------------------------------------------------------------------------
+  *
+  * minmaxdesc.c
+  *	  rmgr descriptor routines for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/rmgrdesc/minmaxdesc.c
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax_xlog.h"
+ 
+ static void
+ out_target(StringInfo buf, xl_minmax_tid *target)
+ {
+ 	appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
+ 			 target->node.spcNode, target->node.dbNode, target->node.relNode,
+ 					 ItemPointerGetBlockNumber(&(target->tid)),
+ 					 ItemPointerGetOffsetNumber(&(target->tid)));
+ }
+ 
+ void
+ minmax_desc(StringInfo buf, uint8 xl_info, char *rec)
+ {
+ 	uint8		info = xl_info & ~XLR_INFO_MASK;
+ 
+ 	info &= XLOG_MINMAX_OPMASK;
+ 	if (info == XLOG_MINMAX_CREATE_INDEX)
+ 	{
+ 		xl_minmax_createidx *xlrec = (xl_minmax_createidx *) rec;
+ 
+ 		appendStringInfo(buf, "create index: %u/%u/%u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode);
+ 	}
+ 	else if (info == XLOG_MINMAX_INSERT)
+ 	{
+ 		xl_minmax_insert *xlrec = (xl_minmax_insert *) rec;
+ 
+ 		if (xl_info & XLOG_MINMAX_INIT_PAGE)
+ 			appendStringInfo(buf, "insert(init): ");
+ 		else
+ 			appendStringInfo(buf, "insert: ");
+ 		out_target(buf, &(xlrec->target));
+ 	}
+ 	else if (info == XLOG_MINMAX_BULKREMOVE)
+ 	{
+ 		xl_minmax_bulkremove *xlrec = (xl_minmax_bulkremove *) rec;
+ 
+ 		appendStringInfo(buf, "bulkremove: rel %u/%u/%u blk %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode, xlrec->block);
+ 	}
+ 	else if (info == XLOG_MINMAX_REVMAP_SET)
+ 	{
+ 		xl_minmax_rm_set *xlrec = (xl_minmax_rm_set *) rec;
+ 
+ 		appendStringInfo(buf, "revmap set: rel %u/%u/%u mapblk %u pagesPerRange %u item %u value %u/%u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode, xlrec->mapBlock,
+ 						 xlrec->pagesPerRange, xlrec->heapBlock,
+ 						 ItemPointerGetBlockNumber(&(xlrec->newval)),
+ 						 ItemPointerGetOffsetNumber(&(xlrec->newval)));
+ 	}
+ 	else if (info == XLOG_MINMAX_METAPG_SET)
+ 	{
+ 		xl_minmax_metapg_set *xlrec = (xl_minmax_metapg_set *) rec;
+ 
+ 		appendStringInfo(buf, "metapg: rel %u/%u/%u array revmap idx %d block %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode,
+ 						 xlrec->blkidx, xlrec->newpg);
+ 	}
+ 	else if (info == XLOG_MINMAX_RMARRAY_SET)
+ 	{
+ 		xl_minmax_rmarray_set *xlrec = (xl_minmax_rmarray_set *) rec;
+ 
+ 		appendStringInfoString(buf, "revmap array: ");
+ 		appendStringInfo(buf, "rel %u/%u/%u array pg %u revmap idx %d block %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode,
+ 						 xlrec->rmarray,
+ 						 xlrec->blkidx, xlrec->newpg);
+ 	}
+ 
+ 	else
+ 		appendStringInfo(buf, "UNKNOWN");
+ }
+ 
*** a/src/backend/access/transam/rmgr.c
--- b/src/backend/access/transam/rmgr.c
***************
*** 12,17 ****
--- 12,18 ----
  #include "access/gist_private.h"
  #include "access/hash.h"
  #include "access/heapam_xlog.h"
+ #include "access/minmax_xlog.h"
  #include "access/multixact.h"
  #include "access/nbtree.h"
  #include "access/spgist.h"
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 2096,2101 **** IndexBuildHeapScan(Relation heapRelation,
--- 2096,2122 ----
  				   IndexBuildCallback callback,
  				   void *callback_state)
  {
+ 	return IndexBuildHeapRangeScan(heapRelation, indexRelation,
+ 								   indexInfo, allow_sync,
+ 								   0, InvalidBlockNumber,
+ 								   callback, callback_state);
+ }
+ 
+ /*
+  * As above, except that instead of scanning the complete heap, only the given
+  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
+  * passing InvalidBlockNumber as numblocks.
+  */
+ double
+ IndexBuildHeapRangeScan(Relation heapRelation,
+ 						Relation indexRelation,
+ 						IndexInfo *indexInfo,
+ 						bool allow_sync,
+ 						BlockNumber start_blockno,
+ 						BlockNumber numblocks,
+ 						IndexBuildCallback callback,
+ 						void *callback_state)
+ {
  	bool		is_system_catalog;
  	bool		checking_uniqueness;
  	HeapScanDesc scan;
***************
*** 2166,2171 **** IndexBuildHeapScan(Relation heapRelation,
--- 2187,2195 ----
  								true,	/* buffer access strategy OK */
  								allow_sync);	/* syncscan OK? */
  
+ 	/* set our endpoints */
+ 	heap_setscanlimits(scan, start_blockno, numblocks);
+ 
  	reltuples = 0;
  
  	/*
*** a/src/backend/replication/logical/decode.c
--- b/src/backend/replication/logical/decode.c
***************
*** 132,137 **** LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
--- 132,138 ----
  		case RM_GIST_ID:
  		case RM_SEQ_ID:
  		case RM_SPGIST_ID:
+ 		case RM_MINMAX_ID:
  			break;
  		case RM_NEXT_ID:
  			elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 324,329 **** PageAddItem(Page page,
--- 324,364 ----
  }
  
  /*
+  * PageOverwriteItemData
+  * 		Overwrite the data for the item at the given offset.
+  *
+  * The new data must fit in the existing data space for the old tuple.
+  */
+ void
+ PageOverwriteItemData(Page page, OffsetNumber offset, Item item, Size size)
+ {
+ 	PageHeader	phdr = (PageHeader) page;
+ 	ItemId		itemId;
+ 
+ 	/*
+ 	 * Be wary about corrupted page pointers
+ 	 */
+ 	if (phdr->pd_lower < SizeOfPageHeaderData ||
+ 		phdr->pd_lower > phdr->pd_upper ||
+ 		phdr->pd_upper > phdr->pd_special ||
+ 		phdr->pd_special > BLCKSZ)
+ 		ereport(PANIC,
+ 				(errcode(ERRCODE_DATA_CORRUPTED),
+ 				 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
+ 						phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
+ 
+ 	itemId = PageGetItemId(phdr, offset);
+ 	if (!ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId))
+ 		elog(ERROR, "existing item to overwrite is not used");
+ 
+ 	if (ItemIdGetLength(itemId) < size)
+ 		elog(ERROR, "existing item is not large enough to be overwritten");
+ 
+ 	memcpy((char *) page + ItemIdGetOffset(itemId), item, size);
+ 	ItemIdSetNormal(itemId, ItemIdGetOffset(itemId), size);
+ }
+ 
+ /*
   * PageGetTempPage
   *		Get a temporary page in local memory for special processing.
   *		The returned page is not initialized at all; caller must do that.
***************
*** 399,405 **** PageRestoreTempPage(Page tempPage, Page oldPage)
  }
  
  /*
!  * sorting support for PageRepairFragmentation and PageIndexMultiDelete
   */
  typedef struct itemIdSortData
  {
--- 434,441 ----
  }
  
  /*
!  * sorting support for PageRepairFragmentation, PageIndexMultiDelete,
!  * PageIndexDeleteNoCompact
   */
  typedef struct itemIdSortData
  {
***************
*** 896,901 **** PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
--- 932,1113 ----
  	phdr->pd_upper = upper;
  }
  
+ /*
+  * PageIndexDeleteNoCompact
+  *		Delete the given items for an index page, and defragment the resulting
+  *		free space, but do not compact the item pointers array.
+  *
+  * itemnos is the array of tuples to delete; nitems is its size.  maxIdxTuples
+  * is the maximum number of tuples that can exist in a page.
+  *
+  * Unused items at the end of the array are removed.
+  *
+  * This is used for index AMs that require that existing TIDs of live tuples
+  * remain unchanged.
+  */
+ void
+ PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos, int nitems)
+ {
+ 	PageHeader	phdr = (PageHeader) page;
+ 	LocationIndex pd_lower = phdr->pd_lower;
+ 	LocationIndex pd_upper = phdr->pd_upper;
+ 	LocationIndex pd_special = phdr->pd_special;
+ 	int			nline;
+ 	bool		empty;
+ 	OffsetNumber offnum;
+ 	int			nextitm;
+ 
+ 	/*
+ 	 * As with PageRepairFragmentation, paranoia seems justified.
+ 	 */
+ 	if (pd_lower < SizeOfPageHeaderData ||
+ 		pd_lower > pd_upper ||
+ 		pd_upper > pd_special ||
+ 		pd_special > BLCKSZ ||
+ 		pd_special != MAXALIGN(pd_special))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_DATA_CORRUPTED),
+ 				 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
+ 						pd_lower, pd_upper, pd_special)));
+ 
+ 	/*
+ 	 * Scan the existing item pointer array and mark as unused those that are
+ 	 * in our kill-list; make sure any non-interesting ones are marked unused
+ 	 * as well.
+ 	 */
+ 	nline = PageGetMaxOffsetNumber(page);
+ 	empty = true;
+ 	nextitm = 0;
+ 	for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
+ 	{
+ 		ItemId		lp;
+ 		ItemLength	itemlen;
+ 		ItemOffset	offset;
+ 
+ 		lp = PageGetItemId(page, offnum);
+ 
+ 		itemlen = ItemIdGetLength(lp);
+ 		offset = ItemIdGetOffset(lp);
+ 
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			if (offset < pd_upper ||
+ 				(offset + itemlen) > pd_special ||
+ 				offset != MAXALIGN(offset))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_DATA_CORRUPTED),
+ 						 errmsg("corrupted item pointer: offset = %u, length = %u",
+ 								offset, (unsigned int) itemlen)));
+ 
+ 			if (nextitm < nitems && offnum == itemnos[nextitm])
+ 			{
+ 				/* this one is on our list to delete, so mark it unused */
+ 				ItemIdSetUnused(lp);
+ 				nextitm++;
+ 			}
+ 			else if (ItemIdHasStorage(lp))
+ 			{
+ 				/* This one's live -- must do the compaction dance */
+ 				empty = false;
+ 			}
+ 			else
+ 			{
+ 				/* get rid of this one too */
+ 				ItemIdSetUnused(lp);
+ 			}
+ 		}
+ 	}
+ 
+ 	/* this will catch invalid or out-of-order itemnos[] */
+ 	if (nextitm != nitems)
+ 		elog(ERROR, "incorrect index offsets supplied");
+ 
+ 	if (empty)
+ 	{
+ 		/* Page is completely empty, so just reset it quickly */
+ 		phdr->pd_lower = SizeOfPageHeaderData;
+ 		phdr->pd_upper = pd_special;
+ 	}
+ 	else
+ 	{
+ 		/* There are live items: need to compact the page the hard way */
+ 		itemIdSortData itemidbase[MaxOffsetNumber];
+ 		itemIdSort	itemidptr;
+ 		int			i;
+ 		Size		totallen;
+ 		Offset		upper;
+ 
+ 		/*
+ 		 * Scan the page taking note of each item that we need to preserve.
+ 		 * This includes both live items (those that contain data) and
+ 		 * interspersed unused ones.  It's critical to preserve these unused
+ 		 * items, because otherwise the offset numbers for later live items
+ 		 * would change, which is not acceptable.  Unused items might get used
+ 		 * again later; that is fine.
+ 		 */
+ 		itemidptr = itemidbase;
+ 		totallen = 0;
+ 		for (i = 0; i < nline; i++, itemidptr++)
+ 		{
+ 			ItemId		lp;
+ 
+ 			itemidptr->offsetindex = i;
+ 
+ 			lp = PageGetItemId(page, i + 1);
+ 			if (ItemIdHasStorage(lp))
+ 			{
+ 				itemidptr->itemoff = ItemIdGetOffset(lp);
+ 				itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
+ 				totallen += itemidptr->alignedlen;
+ 			}
+ 			else
+ 			{
+ 				itemidptr->itemoff = 0;
+ 				itemidptr->alignedlen = 0;
+ 			}
+ 		}
+ 		/* By here, there are exactly nline elements in itemidbase array */
+ 
+ 		if (totallen > (Size) (pd_special - pd_lower))
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_DATA_CORRUPTED),
+ 					 errmsg("corrupted item lengths: total %u, available space %u",
+ 							(unsigned int) totallen, pd_special - pd_lower)));
+ 
+ 		/* sort itemIdSortData array into decreasing itemoff order */
+ 		qsort((char *) itemidbase, nline, sizeof(itemIdSortData),
+ 			  itemoffcompare);
+ 
+ 		/*
+ 		 * Defragment the data areas of each tuple, being careful to preserve
+ 		 * each item's position in the linp array.
+ 		 */
+ 		upper = pd_special;
+ 		PageClearHasFreeLinePointers(page);
+ 		for (i = 0, itemidptr = itemidbase; i < nline; i++, itemidptr++)
+ 		{
+ 			ItemId		lp;
+ 
+ 			lp = PageGetItemId(page, itemidptr->offsetindex + 1);
+ 			if (itemidptr->alignedlen == 0)
+ 			{
+ 				PageSetHasFreeLinePointers(page);
+ 				ItemIdSetUnused(lp);
+ 				continue;
+ 			}
+ 			upper -= itemidptr->alignedlen;
+ 			memmove((char *) page + upper,
+ 					(char *) page + itemidptr->itemoff,
+ 					itemidptr->alignedlen);
+ 			lp->lp_off = upper;
+ 			/* lp_flags and lp_len remain the same as originally */
+ 		}
+ 
+ 		/* Set the new page limits */
+ 		phdr->pd_upper = upper;
+ 		phdr->pd_lower = SizeOfPageHeaderData + i * sizeof(ItemIdData);
+ 	}
+ }
  
  /*
   * Set checksum for a page in shared buffers.
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 7349,7351 **** gincostestimate(PG_FUNCTION_ARGS)
--- 7349,7376 ----
  
  	PG_RETURN_VOID();
  }
+ 
+ Datum
+ mmcostestimate(PG_FUNCTION_ARGS)
+ {
+ 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ 	IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
+ 	double		loop_count = PG_GETARG_FLOAT8(2);
+ 	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+ 	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+ 	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+ 	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ 	IndexOptInfo *index = path->indexinfo;
+ 
+ 	*indexStartupCost = (Cost) seq_page_cost * index->pages * loop_count;
+ 	*indexTotalCost = *indexStartupCost;
+ 
+ 	*indexSelectivity =
+ 		clauselist_selectivity(root, path->indexquals,
+ 							   path->indexinfo->rel->relid,
+ 							   JOIN_INNER, NULL);
+ 	*indexCorrelation = 1;
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
*** a/src/backend/utils/mmgr/mcxt.c
--- b/src/backend/utils/mmgr/mcxt.c
***************
*** 68,74 **** static void MemoryContextStatsInternal(MemoryContext context, int level);
   */
  #define AssertNotInCriticalSection(context) \
  	Assert(CritSectionCount == 0 || (context) == ErrorContext || \
! 		   AmCheckpointerProcess())
  
  /*****************************************************************************
   *	  EXPORTED ROUTINES														 *
--- 68,74 ----
   */
  #define AssertNotInCriticalSection(context) \
  	Assert(CritSectionCount == 0 || (context) == ErrorContext || \
! 		   AmCheckpointerProcess() || true)
  
  /*****************************************************************************
   *	  EXPORTED ROUTINES														 *
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
***************
*** 112,117 **** extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
--- 112,119 ----
  					 bool allow_strat, bool allow_sync);
  extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
  				  int nkeys, ScanKey key);
+ extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
+ 		   BlockNumber endBlk);
  extern void heap_rescan(HeapScanDesc scan, ScanKey key);
  extern void heap_endscan(HeapScanDesc scan);
  extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
*** /dev/null
--- b/src/include/access/minmax.h
***************
*** 0 ****
--- 1,52 ----
+ /*
+  * AM-callable functions for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax.h
+  */
+ #ifndef MINMAX_H
+ #define MINMAX_H
+ 
+ #include "fmgr.h"
+ #include "nodes/execnodes.h"
+ #include "utils/relcache.h"
+ 
+ 
+ /*
+  * prototypes for functions in minmax.c (external entry points for minmax)
+  */
+ extern Datum mmbuild(PG_FUNCTION_ARGS);
+ extern Datum mmbuildempty(PG_FUNCTION_ARGS);
+ extern Datum mminsert(PG_FUNCTION_ARGS);
+ extern Datum mmbeginscan(PG_FUNCTION_ARGS);
+ extern Datum mmgettuple(PG_FUNCTION_ARGS);
+ extern Datum mmgetbitmap(PG_FUNCTION_ARGS);
+ extern Datum mmrescan(PG_FUNCTION_ARGS);
+ extern Datum mmendscan(PG_FUNCTION_ARGS);
+ extern Datum mmmarkpos(PG_FUNCTION_ARGS);
+ extern Datum mmrestrpos(PG_FUNCTION_ARGS);
+ extern Datum mmbulkdelete(PG_FUNCTION_ARGS);
+ extern Datum mmvacuumcleanup(PG_FUNCTION_ARGS);
+ extern Datum mmcanreturn(PG_FUNCTION_ARGS);
+ extern Datum mmcostestimate(PG_FUNCTION_ARGS);
+ extern Datum mmoptions(PG_FUNCTION_ARGS);
+ 
+ /*
+  * Storage type for MinMax' reloptions
+  */
+ typedef struct MinmaxOptions
+ {
+ 	int32		vl_len_;		/* varlena header (do not touch directly!) */
+ 	int		pagesPerRange;
+ } MinmaxOptions;
+ 
+ #define MINMAX_DEFAULT_PAGES_PER_RANGE	128
+ #define MinmaxGetPagesPerRange(relation) \
+ 	((relation)->rd_options ? \
+ 	 ((MinmaxOptions *) (relation)->rd_options)->pagesPerRange : \
+ 	  MINMAX_DEFAULT_PAGES_PER_RANGE)
+ 
+ #endif   /* MINMAX_H */
*** /dev/null
--- b/src/include/access/minmax_internal.h
***************
*** 0 ****
--- 1,37 ----
+ /*
+  * minmax_internal.h
+  *		internal declarations for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_internal.h
+  */
+ #ifndef MINMAX_INTERNAL_H
+ #define MINMAX_INTERNAL_H
+ 
+ #include "storage/buf.h"
+ #include "storage/bufpage.h"
+ #include "storage/off.h"
+ #include "utils/relcache.h"
+ 
+ 
+ extern void mm_metapage_init(Page page);
+ extern Buffer mm_getnewbuffer(Relation irel);
+ extern void rm_page_set_iptr(Page page, int pagesPerRange, BlockNumber heapBlk,
+ 				 BlockNumber blkno, OffsetNumber offno);
+ extern BlockNumber initialize_rmr_page(Buffer newbuf, BlockNumber mapBlk);
+ extern void initialize_rma_page(Buffer buf);
+ 
+ #define MINMAX_DEBUG
+ 
+ /* we allow debug if using GCC; otherwise don't bother */
+ #if defined(MINMAX_DEBUG) && defined(__GNUC__)
+ #define MINMAX_elog(level, ...)		elog(level, __VA_ARGS__)
+ #else
+ #define MINMAX_elog(a)	void(0)
+ #endif
+ 
+ 
+ #endif   /* MINMAX_INTERNAL_H */
*** /dev/null
--- b/src/include/access/minmax_page.h
***************
*** 0 ****
--- 1,87 ----
+ /*
+  * prototypes and definitions for minmax page layouts
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_page.h
+  *
+  * NOTES
+  *
+  * These structs should really be private to specific minmax files, but it's
+  * useful to have them here so that they can be used by pageinspect and similar
+  * tools.
+  */
+ #ifndef MINMAX_PAGE_H
+ #define MINMAX_PAGE_H
+ 
+ 
+ /* special space on all minmax pages stores a "type" identifier */
+ #define		MINMAX_PAGETYPE_META			0xF091
+ #define		MINMAX_PAGETYPE_REVMAP_ARRAY	0xF092
+ #define		MINMAX_PAGETYPE_REVMAP			0xF093
+ #define		MINMAX_PAGETYPE_REGULAR			0xF094
+ 
+ typedef struct MinmaxSpecialSpace
+ {
+ 	uint16	type;
+ } MinmaxSpecialSpace;
+ 
+ /* Metapage definitions */
+ typedef struct MinmaxMetaPageData
+ {
+ 	uint32	minmaxVersion;
+ 	BlockNumber revmapArrayPages[1];	/* actually MAX_REVMAP_ARRAYPAGES */
+ } MinmaxMetaPageData;
+ 
+ /*
+  * Number of array pages listed in metapage.  Need to consider leaving enough
+  * space for the page header, the metapage struct, and the minmax special
+  * space.
+  */
+ #define MAX_REVMAP_ARRAYPAGES	\
+ 	((BLCKSZ - \
+ 	  MAXALIGN(SizeOfPageHeaderData) - \
+ 	  offsetof(MinmaxMetaPageData, revmapArrayPages) - \
+ 	  MAXALIGN(sizeof(MinmaxSpecialSpace)) ) / \
+ 	 sizeof(BlockNumber))
+ 
+ #define MINMAX_CURRENT_VERSION		1
+ 
+ #define MINMAX_METAPAGE_BLKNO	0
+ 
+ /* Definitions for regular revmap pages */
+ typedef struct RevmapContents
+ {
+ 	int32	rmr_logblk;			/* logical blkno of this revmap page */
+ 	ItemPointerData rmr_tids[1];	/* really REGULAR_REVMAP_PAGE_MAXITEMS */
+ } RevmapContents;
+ 
+ #define REGULAR_REVMAP_CONTENT_SIZE	\
+ 	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+ 	 offsetof(RevmapContents, rmr_tids) - \
+ 	 MAXALIGN(sizeof(MinmaxSpecialSpace)))
+ /* max num of items in the array */
+ #define REGULAR_REVMAP_PAGE_MAXITEMS \
+ 	(REGULAR_REVMAP_CONTENT_SIZE / sizeof(ItemPointerData))
+ 
+ /* Definitions for array revmap pages */
+ typedef struct RevmapArrayContents
+ {
+ 	int32	rma_nblocks;
+ 	BlockNumber	rma_blocks[1];	/* really ARRAY_REVMAP_PAGE_MAXITEMS */
+ } RevmapArrayContents;
+ 
+ #define REVMAP_ARRAY_CONTENT_SIZE \
+ 	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+ 	 offsetof(RevmapArrayContents, rma_blocks) - \
+ 	 MAXALIGN(sizeof(MinmaxSpecialSpace)))
+ /* max num of items in the array */
+ #define ARRAY_REVMAP_PAGE_MAXITEMS \
+ 	(REVMAP_ARRAY_CONTENT_SIZE / sizeof(BlockNumber))
+ 
+ 
+ extern void mm_page_init(Page page, uint16 type);
+ 
+ #endif		/* MINMAX_PAGE_H */
*** /dev/null
--- b/src/include/access/minmax_revmap.h
***************
*** 0 ****
--- 1,34 ----
+ /*
+  * prototypes for minmax reverse range maps
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_revmap.h
+  */
+ 
+ #ifndef MINMAX_REVMAP_H
+ #define MINMAX_REVMAP_H
+ 
+ #include "storage/block.h"
+ #include "storage/itemptr.h"
+ #include "storage/off.h"
+ #include "utils/relcache.h"
+ 
+ /* struct definition lives in mmrevmap.c */
+ typedef struct mmRevmapAccess mmRevmapAccess;
+ 
+ extern mmRevmapAccess *mmRevmapAccessInit(Relation idxrel);
+ extern void mmRevmapAccessTerminate(mmRevmapAccess *rmAccess);
+ 
+ extern void mmRevmapCreate(Relation idxrel);
+ extern void mmSetHeapBlockItemptr(mmRevmapAccess *rmAccess, BlockNumber blk,
+ 					  BlockNumber blkno, OffsetNumber offno);
+ extern void mmGetHeapBlockItemptr(mmRevmapAccess *rmAccess, BlockNumber blk,
+ 					  ItemPointerData *iptr);
+ extern void mmRevmapTruncate(mmRevmapAccess *rmAccess,
+ 				 BlockNumber heapNumBlocks);
+ 
+ 
+ #endif   /* MINMAX_REVMAP_H */
*** /dev/null
--- b/src/include/access/minmax_tuple.h
***************
*** 0 ****
--- 1,79 ----
+ /*
+  * Declarations for dealing with MinMax-specific tuples.
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/include/access/minmax_tuple.h
+  */
+ #ifndef MINMAX_TUPLE_H
+ #define MINMAX_TUPLE_H
+ 
+ #include "access/tupdesc.h"
+ 
+ 
+ /*
+  * This struct is used to represent the indexed values for one column, within
+  * one page range.
+  */
+ typedef struct MMValues
+ {
+ 	Datum		min;
+ 	Datum		max;
+ 	bool		hasnulls;
+ 	bool		allnulls;
+ } MMValues;
+ 
+ /*
+  * This struct represents one index tuple, comprising the minimum and
+  * maximum values for all indexed columns, within one page range.
+  * The number of elements in the values array is determined by the accompanying
+  * tuple descriptor.
+  */
+ typedef struct DeformedMMTuple
+ {
+ 	bool		nvalues;		/* XXX unused */
+ 	MMValues	values[FLEXIBLE_ARRAY_MEMBER];
+ } DeformedMMTuple;
+ 
+ /*
+  * An on-disk minmax tuple.  This is possibly followed by a nulls bitmask, with
+  * room for natts*2 null bits; min and max Datum values for each column follow
+  * that.
+  */
+ typedef struct MMTuple
+ {
+ 	/* ---------------
+ 	 * mt_info is laid out in the following fashion:
+ 	 *
+ 	 * 7th (high) bit: has nulls
+ 	 * 6th bit: unused
+ 	 * 5th bit: unused
+ 	 * 4-0 bit: offset of data
+ 	 * ---------------
+ 	 */
+ 	uint8		mt_info;
+ } MMTuple;
+ 
+ #define SizeOfMinMaxTuple	(offsetof(MMTuple, mt_info) + sizeof(uint8))
+ 
+ /*
+  * t_info manipulation macros
+  */
+ #define MMIDX_OFFSET_MASK 0x1F
+ /* bit 0x20 is not used at present */
+ /* bit 0x40 is not used at present */
+ #define MMIDX_NULLS_MASK 0x80
+ 
+ #define MMTupleDataOffset(mmtup)	((Size) (((MMTuple *) (mmtup))->mt_info & MMIDX_OFFSET_MASK))
+ #define MMTupleHasNulls(mmtup)	(((((MMTuple *) (mmtup))->mt_info & MMIDX_NULLS_MASK)) != 0)
+ 
+ 
+ extern TupleDesc minmax_get_descr(TupleDesc tupdesc);
+ extern MMTuple *minmax_form_tuple(TupleDesc idxDesc, TupleDesc diskDesc,
+ 				  DeformedMMTuple *tuple, Size *size);
+ extern void minmax_free_tuple(MMTuple *tuple);
+ extern DeformedMMTuple *minmax_deform_tuple(TupleDesc tupdesc, MMTuple *tuple);
+ 
+ #endif   /* MINMAX_TUPLE_H */
*** /dev/null
--- b/src/include/access/minmax_xlog.h
***************
*** 0 ****
--- 1,132 ----
+ /*-------------------------------------------------------------------------
+  *
+  * minmax_xlog.h
+  *	  POSTGRES MinMax access XLOG definitions.
+  *
+  *
+  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/access/minmax_xlog.h
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef MINMAX_XLOG_H
+ #define MINMAX_XLOG_H
+ 
+ #include "access/xlog.h"
+ #include "storage/bufpage.h"
+ #include "storage/itemptr.h"
+ #include "storage/relfilenode.h"
+ #include "utils/relcache.h"
+ 
+ 
+ /*
+  * WAL record definitions for minmax's WAL operations
+  *
+  * XLOG allows to store some information in high 4 bits of log
+  * record xl_info field.
+  */
+ #define XLOG_MINMAX_CREATE_INDEX	0x00
+ #define XLOG_MINMAX_INSERT			0x10
+ #define XLOG_MINMAX_BULKREMOVE		0x20
+ #define XLOG_MINMAX_REVMAP_SET		0x30
+ #define XLOG_MINMAX_METAPG_SET		0x40
+ #define XLOG_MINMAX_RMARRAY_SET		0x50
+ #define XLOG_MINMAX_INIT_RMPG		0x60
+ 
+ #define XLOG_MINMAX_OPMASK			0x70
+ /*
+  * When we insert the first item on a new page, we restore the entire page in
+  * redo.
+  */
+ #define XLOG_MINMAX_INIT_PAGE		0x80
+ 
+ /* This is what we need to know about a minmax index create */
+ typedef struct xl_minmax_createidx
+ {
+ 	RelFileNode	node;
+ } xl_minmax_createidx;
+ #define SizeOfMinmaxCreateIdx	(offsetof(xl_minmax_createidx, node) + sizeof(RelFileNode)
+ 
+ /* All that we need to find a minmax tuple */
+ typedef struct xl_minmax_tid
+ {
+ 	RelFileNode	node;
+ 	ItemPointerData tid;
+ } xl_minmax_tid;
+ 
+ #define SizeOfMinmaxTid		(offsetof(xl_minmax_tid, tid) + SizeOfIptrData)
+ 
+ /* This is what we need to know about a minmax tuple insert */
+ typedef struct xl_minmax_insert
+ {
+ 	xl_minmax_tid	target;
+ 	bool			overwrite;
+ 	/* tuple data follows at end of struct */
+ } xl_minmax_insert;
+ 
+ #define SizeOfMinmaxInsert		(offsetof(xl_minmax_insert, overwrite) + sizeof(bool))
+ 
+ /* This is what we need to know about a bulk minmax tuple remove */
+ typedef struct xl_minmax_bulkremove
+ {
+ 	RelFileNode node;
+ 	BlockNumber	block;
+ 	/* offset number array follows at end of struct */
+ } xl_minmax_bulkremove;
+ 
+ #define SizeOfMinmaxBulkRemove	(offsetof(xl_minmax_bulkremove, block) + sizeof(BlockNumber))
+ 
+ /* This is what we need to know about a revmap "set heap ptr" */
+ typedef struct xl_minmax_rm_set
+ {
+ 	RelFileNode		node;
+ 	BlockNumber		mapBlock;
+ 	int				pagesPerRange;
+ 	BlockNumber		heapBlock;
+ 	ItemPointerData newval;
+ } xl_minmax_rm_set;
+ 
+ #define SizeOfMinmaxRevmapSet	(offsetof(xl_minmax_rm_set, newval) + SizeOfIptrData)
+ 
+ /* This is what we need to know about a "metapage set" operation */
+ typedef struct xl_minmax_metapg_set
+ {
+ 	RelFileNode		node;
+ 	uint32			blkidx;
+ 	BlockNumber		newpg;
+ } xl_minmax_metapg_set;
+ 
+ #define SizeOfMinmaxMetapgSet	(offsetof(xl_minmax_metapg_set, newpg) + \
+ 								 sizeof(BlockNumber))
+ 
+ /* This is what we need to know about a "revmap array set" operation */
+ typedef struct xl_minmax_rmarray_set
+ {
+ 	RelFileNode		node;
+ 	BlockNumber		rmarray;
+ 	uint32			blkidx;
+ 	BlockNumber		newpg;
+ } xl_minmax_rmarray_set;
+ 
+ #define SizeOfMinmaxRmarraySet	(offsetof(xl_minmax_rmarray_set, newpg) + \
+ 								 sizeof(BlockNumber))
+ 
+ /* This is what we need to know when we initialize a new revmap page */
+ typedef struct xl_minmax_init_rmpg
+ {
+ 	RelFileNode		node;
+ 	bool			array;	/* array revmap page or regular revmap page */
+ 	BlockNumber		blkno;
+ 	BlockNumber		logblk;	/* only used by regular revmap pages */
+ } xl_minmax_init_rmpg;
+ 
+ #define SizeOfMinmaxInitRmpg	(offsetof(xl_minmax_init_rmpg, blkno) + \
+ 								 sizeof(BlockNumber))
+ 
+ 
+ extern void minmax_desc(StringInfo buf, uint8 xl_info, char *rec);
+ extern void minmax_redo(XLogRecPtr lsn, XLogRecord *record);
+ 
+ #endif	/* MINMAX_XLOG_H */
*** a/src/include/access/reloptions.h
--- b/src/include/access/reloptions.h
***************
*** 45,52 **** typedef enum relopt_kind
  	RELOPT_KIND_TABLESPACE = (1 << 7),
  	RELOPT_KIND_SPGIST = (1 << 8),
  	RELOPT_KIND_VIEW = (1 << 9),
  	/* if you add a new kind, make sure you update "last_default" too */
! 	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_VIEW,
  	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
  	RELOPT_KIND_MAX = (1 << 30)
  } relopt_kind;
--- 45,53 ----
  	RELOPT_KIND_TABLESPACE = (1 << 7),
  	RELOPT_KIND_SPGIST = (1 << 8),
  	RELOPT_KIND_VIEW = (1 << 9),
+ 	RELOPT_KIND_MINMAX = (1 << 10),
  	/* if you add a new kind, make sure you update "last_default" too */
! 	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_MINMAX,
  	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
  	RELOPT_KIND_MAX = (1 << 30)
  } relopt_kind;
*** a/src/include/access/relscan.h
--- b/src/include/access/relscan.h
***************
*** 35,42 **** typedef struct HeapScanDescData
  	bool		rs_temp_snap;	/* unregister snapshot at scan end? */
  
  	/* state set up at initscan time */
! 	BlockNumber rs_nblocks;		/* number of blocks to scan */
  	BlockNumber rs_startblock;	/* block # to start at */
  	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
  	bool		rs_syncscan;	/* report location to syncscan logic? */
  
--- 35,44 ----
  	bool		rs_temp_snap;	/* unregister snapshot at scan end? */
  
  	/* state set up at initscan time */
! 	BlockNumber rs_nblocks;		/* total number of blocks in rel */
  	BlockNumber rs_startblock;	/* block # to start at */
+ 	BlockNumber	rs_initblock;	/* block # to consider initial of rel */
+ 	BlockNumber	rs_numblocks;	/* number of blocks to scan */
  	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
  	bool		rs_syncscan;	/* report location to syncscan logic? */
  
*** a/src/include/access/rmgrlist.h
--- b/src/include/access/rmgrlist.h
***************
*** 42,44 **** PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup
--- 42,45 ----
  PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup)
  PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL)
  PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup)
+ PG_RMGR(RM_MINMAX_ID, "MinMax", minmax_redo, minmax_desc, NULL, NULL)
*** a/src/include/catalog/index.h
--- b/src/include/catalog/index.h
***************
*** 97,102 **** extern double IndexBuildHeapScan(Relation heapRelation,
--- 97,110 ----
  				   bool allow_sync,
  				   IndexBuildCallback callback,
  				   void *callback_state);
+ extern double IndexBuildHeapRangeScan(Relation heapRelation,
+ 						Relation indexRelation,
+ 						IndexInfo *indexInfo,
+ 						bool allow_sync,
+ 						BlockNumber start_blockno,
+ 						BlockNumber end_blockno,
+ 						IndexBuildCallback callback,
+ 						void *callback_state);
  
  extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
  
*** a/src/include/catalog/pg_am.h
--- b/src/include/catalog/pg_am.h
***************
*** 132,136 **** DESCR("GIN index access method");
--- 132,138 ----
  DATA(insert OID = 4000 (  spgist	0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
  DESCR("SP-GiST index access method");
  #define SPGIST_AM_OID 4000
+ DATA(insert OID = 3580 (  minmax	5 0 f f f f t t f t t f f 0 mminsert mmbeginscan - mmgetbitmap mmrescan mmendscan mmmarkpos mmrestrpos mmbuild mmbuildempty mmbulkdelete mmvacuumcleanup - mmcostestimate mmoptions ));
+ #define MINMAX_AM_OID 3580
  
  #endif   /* PG_AM_H */
*** a/src/include/catalog/pg_amop.h
--- b/src/include/catalog/pg_amop.h
***************
*** 845,848 **** DATA(insert (	3550	869 869 25 s	932 783 0 ));
--- 845,929 ----
  DATA(insert (	3550	869 869 26 s	933 783 0 ));
  DATA(insert (	3550	869 869 27 s	934 783 0 ));
  
+ /*
+  * MinMax int4_ops
+  */
+ DATA(insert (	4054     23   23 1 s	  97	3580 0 ));
+ DATA(insert (	4054     23   23 2 s	 523	3580 0 ));
+ DATA(insert (	4054     23   23 3 s	  96	3580 0 ));
+ DATA(insert (	4054     23   23 4 s	 525	3580 0 ));
+ DATA(insert (	4054     23   23 5 s	 521	3580 0 ));
+ 
+ /*
+  * MinMax numeric_ops
+  */
+ DATA(insert (	4055   1700 1700 1 s	1754	3580 0 ));
+ DATA(insert (	4055   1700 1700 2 s	1755	3580 0 ));
+ DATA(insert (	4055   1700 1700 3 s	1752	3580 0 ));
+ DATA(insert (	4055   1700 1700 4 s	1757	3580 0 ));
+ DATA(insert (	4055   1700 1700 5 s	1756	3580 0 ));
+ 
+ /*
+  * MinMax text_ops
+  */
+ DATA(insert (	4056     25   25 1 s	 664	3580 0 ));
+ DATA(insert (	4056     25   25 2 s	 665	3580 0 ));
+ DATA(insert (	4056     25   25 3 s	  98	3580 0 ));
+ DATA(insert (	4056     25   25 4 s	 667	3580 0 ));
+ DATA(insert (	4056     25   25 5 s	 666	3580 0 ));
+ 
+ /*
+  * MinMax time_ops
+  */
+ DATA(insert (	4057   1083 1083 1 s	1110	3580 0 ));
+ DATA(insert (	4057   1083 1083 2 s	1111	3580 0 ));
+ DATA(insert (	4057   1083 1083 3 s	1108	3580 0 ));
+ DATA(insert (	4057   1083 1083 4 s	1113	3580 0 ));
+ DATA(insert (	4057   1083 1083 5 s	1112	3580 0 ));
+ 
+ /*
+  * MinMax timetz_ops
+  */
+ DATA(insert (	4058   1266 1266 1 s	1552	3580 0 ));
+ DATA(insert (	4058   1266 1266 2 s	1553	3580 0 ));
+ DATA(insert (	4058   1266 1266 3 s	1550	3580 0 ));
+ DATA(insert (	4058   1266 1266 4 s	1555	3580 0 ));
+ DATA(insert (	4058   1266 1266 5 s	1554	3580 0 ));
+ 
+ /*
+  * MinMax timestamp_ops
+  */
+ DATA(insert (	4059   1114 1114 1 s	2062	3580 0 ));
+ DATA(insert (	4059   1114 1114 2 s	2063	3580 0 ));
+ DATA(insert (	4059   1114 1114 3 s	2060	3580 0 ));
+ DATA(insert (	4059   1114 1114 4 s	2065	3580 0 ));
+ DATA(insert (	4059   1114 1114 5 s	2064	3580 0 ));
+ 
+ /*
+  * MinMax timestamptz_ops
+  */
+ DATA(insert (	4060   1184 1184 1 s	1322	3580 0 ));
+ DATA(insert (	4060   1184 1184 2 s	1323	3580 0 ));
+ DATA(insert (	4060   1184 1184 3 s	1320	3580 0 ));
+ DATA(insert (	4060   1184 1184 4 s	1325	3580 0 ));
+ DATA(insert (	4060   1184 1184 5 s	1324	3580 0 ));
+ 
+ /*
+  * MinMax date_ops
+  */
+ DATA(insert (	4061   1082 1082 1 s	1095	3580 0 ));
+ DATA(insert (	4061   1082 1082 2 s	1096	3580 0 ));
+ DATA(insert (	4061   1082 1082 3 s	1093	3580 0 ));
+ DATA(insert (	4061   1082 1082 4 s	1098	3580 0 ));
+ DATA(insert (	4061   1082 1082 5 s	1097	3580 0 ));
+ 
+ /*
+  * MinMax char_ops
+  */
+ DATA(insert (	4062     18   18 1 s	 631	3580 0 ));
+ DATA(insert (	4062     18   18 2 s	 632	3580 0 ));
+ DATA(insert (	4062     18   18 3 s	  92	3580 0 ));
+ DATA(insert (	4062     18   18 4 s	 634	3580 0 ));
+ DATA(insert (	4062     18   18 5 s	 633	3580 0 ));
+ 
  #endif   /* PG_AMOP_H */
*** a/src/include/catalog/pg_opclass.h
--- b/src/include/catalog/pg_opclass.h
***************
*** 235,239 **** DATA(insert (	403		jsonb_ops			PGNSP PGUID 4033  3802 t 0 ));
--- 235,248 ----
  DATA(insert (	405		jsonb_ops			PGNSP PGUID 4034  3802 t 0 ));
  DATA(insert (	2742	jsonb_ops			PGNSP PGUID 4036  3802 t 25 ));
  DATA(insert (	2742	jsonb_path_ops		PGNSP PGUID 4037  3802 f 23 ));
+ DATA(insert (	3580	int4_ops			PGNSP PGUID 4054    23 t 0 ));
+ DATA(insert (	3580	numeric_ops			PGNSP PGUID 4055  1700 t 0 ));
+ DATA(insert (	3580	text_ops			PGNSP PGUID 4056    25 t 0 ));
+ DATA(insert (	3580	time_ops			PGNSP PGUID 4057  1083 t 0 ));
+ DATA(insert (	3580	timetz_ops			PGNSP PGUID 4058  1266 t 0 ));
+ DATA(insert (	3580	timestamp_ops		PGNSP PGUID 4059  1114 t 0 ));
+ DATA(insert (	3580	timestamptz_ops		PGNSP PGUID 4060  1184 t 0 ));
+ DATA(insert (	3580	date_ops			PGNSP PGUID 4061  1082 t 0 ));
+ DATA(insert (	3580	char_ops			PGNSP PGUID 4062    18 t 0 ));
  
  #endif   /* PG_OPCLASS_H */
*** a/src/include/catalog/pg_opfamily.h
--- b/src/include/catalog/pg_opfamily.h
***************
*** 157,160 **** DATA(insert OID = 4035 (	783		jsonb_ops		PGNSP PGUID ));
--- 157,170 ----
  DATA(insert OID = 4036 (	2742	jsonb_ops		PGNSP PGUID ));
  DATA(insert OID = 4037 (	2742	jsonb_path_ops	PGNSP PGUID ));
  
+ DATA(insert OID = 4054 (	3580	int4_ops		PGNSP PGUID ));
+ DATA(insert OID = 4055 (	3580	numeric_ops		PGNSP PGUID ));
+ DATA(insert OID = 4056 (	3580	text_ops		PGNSP PGUID ));
+ DATA(insert OID = 4057 (	3580	time_ops	PGNSP PGUID ));
+ DATA(insert OID = 4058 (	3580	timetz_ops	PGNSP PGUID ));
+ DATA(insert OID = 4059 (	3580	timestamp_ops	PGNSP PGUID ));
+ DATA(insert OID = 4060 (	3580	timestamptz_ops	PGNSP PGUID ));
+ DATA(insert OID = 4061 (	3580	date_ops	PGNSP PGUID ));
+ DATA(insert OID = 4062 (    3580    char_ops    PGNSP PGUID ));
+ 
  #endif   /* PG_OPFAMILY_H */
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 565,570 **** DESCR("btree(internal)");
--- 565,598 ----
  DATA(insert OID = 2785 (  btoptions		   PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  btoptions _null_ _null_ _null_ ));
  DESCR("btree(internal)");
  
+ DATA(insert OID = 3789 (  mmgetbitmap	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_	mmgetbitmap _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3790 (  mminsert		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	mminsert _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3791 (  mmbeginscan	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	mmbeginscan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3792 (  mmrescan		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmrescan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3793 (  mmendscan		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmendscan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3794 (  mmmarkpos		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmmarkpos _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3795 (  mmrestrpos		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmrestrpos _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3796 (  mmbuild		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ mmbuild _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3797 (  mmbuildempty	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmbuildempty _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3798 (  mmbulkdelete	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmbulkdelete _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3799 (  mmvacuumcleanup   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmvacuumcleanup _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3800 (  mmcostestimate   PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmcostestimate _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3801 (  mmoptions		   PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  mmoptions _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ 
+ 
  DATA(insert OID = 339 (  poly_same		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_same _null_ _null_ _null_ ));
  DATA(insert OID = 340 (  poly_contain	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_contain _null_ _null_ _null_ ));
  DATA(insert OID = 341 (  poly_left		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_left _null_ _null_ _null_ ));
*** a/src/include/storage/bufpage.h
--- b/src/include/storage/bufpage.h
***************
*** 393,398 **** extern void PageInit(Page page, Size pageSize, Size specialSize);
--- 393,400 ----
  extern bool PageIsVerified(Page page, BlockNumber blkno);
  extern OffsetNumber PageAddItem(Page page, Item item, Size size,
  			OffsetNumber offsetNumber, bool overwrite, bool is_heap);
+ extern void PageOverwriteItemData(Page page, OffsetNumber offset, Item item,
+ 					  Size size);
  extern Page PageGetTempPage(Page page);
  extern Page PageGetTempPageCopy(Page page);
  extern Page PageGetTempPageCopySpecial(Page page);
***************
*** 403,408 **** extern Size PageGetExactFreeSpace(Page page);
--- 405,412 ----
  extern Size PageGetHeapFreeSpace(Page page);
  extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
  extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
+ extern void PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos,
+ 						 int nitems);
  extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
  extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
  
*** a/src/include/utils/selfuncs.h
--- b/src/include/utils/selfuncs.h
***************
*** 195,200 **** extern Datum hashcostestimate(PG_FUNCTION_ARGS);
--- 195,201 ----
  extern Datum gistcostestimate(PG_FUNCTION_ARGS);
  extern Datum spgcostestimate(PG_FUNCTION_ARGS);
  extern Datum gincostestimate(PG_FUNCTION_ARGS);
+ extern Datum mmcostestimate(PG_FUNCTION_ARGS);
  
  /* Functions in array_selfuncs.c */
  
*** a/src/test/regress/expected/opr_sanity.out
--- b/src/test/regress/expected/opr_sanity.out
***************
*** 1547,1552 **** ORDER BY 1, 2, 3;
--- 1547,1557 ----
         2742 |            9 | ?
         2742 |           10 | ?|
         2742 |           11 | ?&
+        3580 |            1 | <
+        3580 |            2 | <=
+        3580 |            3 | =
+        3580 |            4 | >=
+        3580 |            5 | >
         4000 |            1 | <<
         4000 |            1 | ~<~
         4000 |            2 | &<
***************
*** 1569,1575 **** ORDER BY 1, 2, 3;
         4000 |           15 | >
         4000 |           16 | @>
         4000 |           18 | =
! (80 rows)
  
  -- Check that all opclass search operators have selectivity estimators.
  -- This is not absolutely required, but it seems a reasonable thing
--- 1574,1580 ----
         4000 |           15 | >
         4000 |           16 | @>
         4000 |           18 | =
! (85 rows)
  
  -- Check that all opclass search operators have selectivity estimators.
  -- This is not absolutely required, but it seems a reasonable thing
***************
*** 1731,1736 **** WHERE NOT (
--- 1736,1742 ----
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
+   -- MinMax has no support functions
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
***************
*** 1756,1762 **** WHERE NOT (
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
   amname | opcname | procnums 
  --------+---------+----------
--- 1762,1769 ----
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{NULL}'
  );
   amname | opcname | procnums 
  --------+---------+----------
*** a/src/test/regress/sql/opr_sanity.sql
--- b/src/test/regress/sql/opr_sanity.sql
***************
*** 1154,1159 **** WHERE NOT (
--- 1154,1160 ----
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
+   -- MinMax has no support functions
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
***************
*** 1177,1183 **** WHERE NOT (
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
  
  -- Unfortunately, we can't check the amproc link very well because the
--- 1178,1185 ----
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{NULL}'
  );
  
  -- Unfortunately, we can't check the amproc link very well because the