*** a/contrib/pageinspect/Makefile
--- b/contrib/pageinspect/Makefile
***************
*** 1,7 ****
  # contrib/pageinspect/Makefile
  
  MODULE_big	= pageinspect
! OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o $(WIN32RES)
  
  EXTENSION = pageinspect
  DATA = pageinspect--1.2.sql pageinspect--1.0--1.1.sql \
--- 1,7 ----
  # contrib/pageinspect/Makefile
  
  MODULE_big	= pageinspect
! OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o mmfuncs.o $(WIN32RES)
  
  EXTENSION = pageinspect
  DATA = pageinspect--1.2.sql pageinspect--1.0--1.1.sql \
*** /dev/null
--- b/contrib/pageinspect/mmfuncs.c
***************
*** 0 ****
--- 1,407 ----
+ /*
+  * mmfuncs.c
+  *		Functions to investigate MinMax indexes
+  *
+  * Copyright (c) 2014, PostgreSQL Global Development Group
+  *
+  * IDENTIFICATION
+  *		contrib/pageinspect/mmfuncs.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
+ #include "catalog/index.h"
+ #include "catalog/pg_type.h"
+ #include "funcapi.h"
+ #include "lib/stringinfo.h"
+ #include "utils/array.h"
+ #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
+ #include "utils/rel.h"
+ #include "miscadmin.h"
+ 
+ 
+ PG_FUNCTION_INFO_V1(minmax_page_type);
+ PG_FUNCTION_INFO_V1(minmax_page_items);
+ PG_FUNCTION_INFO_V1(minmax_metapage_info);
+ PG_FUNCTION_INFO_V1(minmax_revmap_data);
+ 
+ typedef struct mm_column_state
+ {
+ 	int			nstored;
+ 	FmgrInfo	outputFn[FLEXIBLE_ARRAY_MEMBER];
+ } mm_column_state;
+ 
+ typedef struct mm_page_state
+ {
+ 	MinmaxDesc *mmdesc;
+ 	Page		page;
+ 	OffsetNumber offset;
+ 	bool		unusedItem;
+ 	bool		done;
+ 	AttrNumber	attno;
+ 	DeformedMMTuple *dtup;
+ 	mm_column_state *columns[FLEXIBLE_ARRAY_MEMBER];
+ } mm_page_state;
+ 
+ 
+ static Page verify_minmax_page(bytea *raw_page, uint16 type,
+ 				 const char *strtype);
+ 
+ Datum
+ minmax_page_type(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page = VARDATA(raw_page);
+ 	MinmaxSpecialSpace *special;
+ 	char *type;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	switch (special->type)
+ 	{
+ 		case MINMAX_PAGETYPE_META:
+ 			type = "meta";
+ 			break;
+ 		case MINMAX_PAGETYPE_REVMAP:
+ 			type = "revmap";
+ 			break;
+ 		case MINMAX_PAGETYPE_REGULAR:
+ 			type = "regular";
+ 			break;
+ 		default:
+ 			type = psprintf("unknown (%02x)", special->type);
+ 			break;
+ 	}
+ 
+ 	PG_RETURN_TEXT_P(cstring_to_text(type));
+ }
+ 
+ /*
+  * Verify that the given bytea contains a minmax page of the indicated page
+  * type, or die in the attempt.  A pointer to the page is returned.
+  */
+ static Page
+ verify_minmax_page(bytea *raw_page, uint16 type, const char *strtype)
+ {
+ 	Page	page;
+ 	int		raw_page_size;
+ 	MinmaxSpecialSpace *special;
+ 
+ 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+ 
+ 	if (raw_page_size < SizeOfPageHeaderData)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("input page too small"),
+ 				 errdetail("Expected size %d, got %d", raw_page_size, BLCKSZ)));
+ 
+ 	page = VARDATA(raw_page);
+ 
+ 	/* verify the special space says this page is what we want */
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	if (special->type != type)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ 				 errmsg("page is not a Minmax page of type \"%s\"", strtype),
+ 				 errdetail("Expected special type %08x, got %08x.",
+ 						   type, special->type)));
+ 
+ 	return page;
+ }
+ 
+ 
+ /*
+  * Extract all item values from a minmax index page
+  *
+  * Usage: SELECT * FROM minmax_page_items(get_raw_page('idx', 1), 'idx'::regclass);
+  */
+ Datum
+ minmax_page_items(PG_FUNCTION_ARGS)
+ {
+ 	mm_page_state *state;
+ 	FuncCallContext *fctx;
+ 
+ 	if (!superuser())
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ 				 (errmsg("must be superuser to use raw page functions"))));
+ 
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 		Oid			indexRelid = PG_GETARG_OID(1);
+ 		Page		page;
+ 		TupleDesc	tupdesc;
+ 		MemoryContext mctx;
+ 		Relation	indexRel;
+ 		AttrNumber	attno;
+ 
+ 		/* minimally verify the page we got */
+ 		page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_REGULAR, "regular");
+ 
+ 		/* create a function context for cross-call persistence */
+ 		fctx = SRF_FIRSTCALL_INIT();
+ 
+ 		/* switch to memory context appropriate for multiple function calls */
+ 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+ 
+ 		/* Build a tuple descriptor for our result type */
+ 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 			elog(ERROR, "return type must be a row type");
+ 
+ 		indexRel = index_open(indexRelid, AccessShareLock);
+ 
+ 		state = palloc(offsetof(mm_page_state, columns) +
+ 					   sizeof(mm_column_state) * RelationGetDescr(indexRel)->natts);
+ 
+ 		state->mmdesc = minmax_build_mmdesc(indexRel);
+ 		state->page = page;
+ 		state->offset = FirstOffsetNumber;
+ 		state->unusedItem = false;
+ 		state->done = false;
+ 		state->dtup = NULL;
+ 
+ 		for (attno = 1; attno <= state->mmdesc->md_tupdesc->natts; attno++)
+ 		{
+ 			Oid		output;
+ 			bool	isVarlena;
+ 			FmgrInfo *opcInfoFn;
+ 			MinmaxOpcInfo *opcinfo;
+ 			int		i;
+ 			mm_column_state *column;
+ 
+ 			opcInfoFn = index_getprocinfo(indexRel, attno, MINMAX_PROCNUM_OPCINFO);
+ 			opcinfo = (MinmaxOpcInfo *)
+ 				DatumGetPointer(FunctionCall1(opcInfoFn, InvalidOid));
+ 
+ 			column = palloc(offsetof(mm_column_state, outputFn) +
+ 							sizeof(FmgrInfo) * opcinfo->oi_nstored);
+ 
+ 			column->nstored = opcinfo->oi_nstored;
+ 			for (i = 0; i < opcinfo->oi_nstored; i++)
+ 			{
+ 				getTypeOutputInfo(opcinfo->oi_typids[i], &output, &isVarlena);
+ 				fmgr_info(output, &column->outputFn[i]);
+ 			}
+ 
+ 			state->columns[attno - 1] = column;
+ 		}
+ 
+ 		index_close(indexRel, AccessShareLock);
+ 
+ 		fctx->user_fctx = state;
+ 		fctx->tuple_desc = BlessTupleDesc(tupdesc);
+ 
+ 		MemoryContextSwitchTo(mctx);
+ 	}
+ 
+ 	fctx = SRF_PERCALL_SETUP();
+ 	state = fctx->user_fctx;
+ 
+ 	if (!state->done)
+ 	{
+ 		HeapTuple	result;
+ 		Datum		values[5];
+ 		bool		nulls[5];
+ 
+ 		/*
+ 		 * This loop is called once for every attribute of every tuple in the
+ 		 * page.  At the start of a tuple, we get a NULL dtup; that's our
+ 		 * signal for obtaining and decoding the next one.  If that's not the
+ 		 * case, we output the next attribute.
+ 		 */
+ 		if (state->dtup == NULL)
+ 		{
+ 			MMTuple	   *tup;
+ 			MemoryContext mctx;
+ 			ItemId		itemId;
+ 
+ 			/* deformed tuple must live across calls */
+ 			mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+ 
+ 			/* verify item status: if there's no data, we can't decode */
+ 			itemId = PageGetItemId(state->page, state->offset);
+ 			if (ItemIdIsUsed(itemId))
+ 			{
+ 				tup = (MMTuple *) PageGetItem(state->page,
+ 											  PageGetItemId(state->page,
+ 															state->offset));
+ 				state->dtup = minmax_deform_tuple(state->mmdesc, tup);
+ 				state->attno = 1;
+ 				state->unusedItem = false;
+ 			}
+ 			else
+ 				state->unusedItem = true;
+ 
+ 			MemoryContextSwitchTo(mctx);
+ 		}
+ 		else
+ 			state->attno++;
+ 
+ 		MemSet(nulls, 0, sizeof(nulls));
+ 
+ 		if (state->unusedItem)
+ 		{
+ 			values[0] = UInt16GetDatum(state->offset);
+ 			nulls[1] = true;
+ 			nulls[2] = true;
+ 			nulls[3] = true;
+ 			nulls[4] = true;
+ 		}
+ 		else
+ 		{
+ 			int		att = state->attno - 1;
+ 
+ 			values[0] = UInt16GetDatum(state->offset);
+ 			values[1] = UInt16GetDatum(state->attno);
+ 			values[2] = BoolGetDatum(state->dtup->dt_columns[att].allnulls);
+ 			values[3] = BoolGetDatum(state->dtup->dt_columns[att].hasnulls);
+ 			if (!state->dtup->dt_columns[att].allnulls)
+ 			{
+ 				MMValues   *mmvalues = &state->dtup->dt_columns[att];
+ 				StringInfoData	s;
+ 				bool		first;
+ 				int			i;
+ 
+ 				initStringInfo(&s);
+ 				appendStringInfoChar(&s, '{');
+ 
+ 				first = true;
+ 				for (i = 0; i < state->columns[att]->nstored; i++)
+ 				{
+ 					char   *val;
+ 
+ 					if (!first)
+ 						appendStringInfoString(&s, " .. ");
+ 					first = false;
+ 					val = OutputFunctionCall(&state->columns[att]->outputFn[i],
+ 											 mmvalues->values[i]);
+ 					appendStringInfoString(&s, val);
+ 					pfree(val);
+ 				}
+ 				appendStringInfoChar(&s, '}');
+ 
+ 				values[4] = CStringGetTextDatum(s.data);
+ 				pfree(s.data);
+ 			}
+ 			else
+ 			{
+ 				nulls[4] = true;
+ 			}
+ 		}
+ 
+ 		result = heap_form_tuple(fctx->tuple_desc, values, nulls);
+ 
+ 		/*
+ 		 * If the item was unused, jump straight to the next one; otherwise,
+ 		 * the only cleanup needed here is to set our signal to go to the next
+ 		 * tuple in the following iteration, by freeing the current one.
+ 		 */
+ 		if (state->unusedItem)
+ 			state->offset = OffsetNumberNext(state->offset);
+ 		else if (state->attno >= state->mmdesc->md_tupdesc->natts)
+ 		{
+ 			pfree(state->dtup);
+ 			state->dtup = NULL;
+ 			state->offset = OffsetNumberNext(state->offset);
+ 		}
+ 
+ 		/*
+ 		 * If we're beyond the end of the page, set flag to end the function in
+ 		 * the following iteration.
+ 		 */
+ 		if (state->offset > PageGetMaxOffsetNumber(state->page))
+ 			state->done = true;
+ 
+ 		SRF_RETURN_NEXT(fctx, HeapTupleGetDatum(result));
+ 	}
+ 
+ 	minmax_free_mmdesc(state->mmdesc);
+ 
+ 	SRF_RETURN_DONE(fctx);
+ }
+ 
+ Datum
+ minmax_metapage_info(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page;
+ 	MinmaxMetaPageData *meta;
+ 	TupleDesc	tupdesc;
+ 	Datum		values[4];
+ 	bool		nulls[4];
+ 	HeapTuple	htup;
+ 
+ 	page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_META, "metapage");
+ 
+ 	/* Build a tuple descriptor for our result type */
+ 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 		elog(ERROR, "return type must be a row type");
+ 	tupdesc = BlessTupleDesc(tupdesc);
+ 
+ 	/* Extract values from the metapage */
+ 	meta = (MinmaxMetaPageData *) PageGetContents(page);
+ 	MemSet(nulls, 0, sizeof(nulls));
+ 	values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->minmaxMagic));
+ 	values[1] = Int32GetDatum(meta->minmaxVersion);
+ 	values[2] = Int32GetDatum(meta->pagesPerRange);
+ 	values[3] = Int64GetDatum(meta->lastRevmapPage);
+ 
+ 	htup = heap_form_tuple(tupdesc, values, nulls);
+ 
+ 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
+ }
+ 
+ /*
+  * Return the TID array stored in a minmax revmap page
+  */
+ Datum
+ minmax_revmap_data(PG_FUNCTION_ARGS)
+ {
+ 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+ 	Page		page;
+ 	RevmapContents *contents;
+ 	TupleDesc	tupdesc;
+ 	Datum		values[2];
+ 	bool		nulls[2];
+ 	HeapTuple	htup;
+ 	ArrayBuildState *astate = NULL;
+ 	int			i;
+ 
+ 	page = verify_minmax_page(raw_page, MINMAX_PAGETYPE_REVMAP, "revmap");
+ 
+ 	/* Build a tuple descriptor for our result type */
+ 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ 		elog(ERROR, "return type must be a row type");
+ 	tupdesc = BlessTupleDesc(tupdesc);
+ 
+ 	/* Extract values from the revmap page */
+ 	contents = (RevmapContents *) PageGetContents(page);
+ 	MemSet(nulls, 0, sizeof(nulls));
+ 	values[0] = Int64GetDatum((uint64) 0);
+ 
+ 	/* Extract (possibly empty) list of TIDs in this page. */
+ 	for (i = 0; i < REVMAP_PAGE_MAXITEMS; i++)
+ 	{
+ 		ItemPointer	tid;
+ 
+ 		tid = &contents->rmr_tids[i];
+ 		astate = accumArrayResult(astate,
+ 								  PointerGetDatum(tid),
+ 								  false, TIDOID, CurrentMemoryContext);
+ 	}
+ 	if (astate == NULL)
+ 		nulls[1] = true;
+ 	else
+ 		values[1] = makeArrayResult(astate, CurrentMemoryContext);
+ 
+ 	htup = heap_form_tuple(tupdesc, values, nulls);
+ 
+ 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
+ }
*** a/contrib/pageinspect/pageinspect--1.2.sql
--- b/contrib/pageinspect/pageinspect--1.2.sql
***************
*** 99,104 **** AS 'MODULE_PATHNAME', 'bt_page_items'
--- 99,140 ----
  LANGUAGE C STRICT;
  
  --
+ -- minmax_page_type()
+ --
+ CREATE FUNCTION minmax_page_type(IN page bytea)
+ RETURNS text
+ AS 'MODULE_PATHNAME', 'minmax_page_type'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_metapage_info()
+ --
+ CREATE FUNCTION minmax_metapage_info(IN page bytea, OUT magic text,
+ 	OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint)
+ AS 'MODULE_PATHNAME', 'minmax_metapage_info'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_page_items()
+ --
+ CREATE FUNCTION minmax_page_items(IN page bytea, IN index_oid oid,
+ 	OUT itemoffset int,
+ 	OUT attnum int,
+ 	OUT allnulls bool,
+ 	OUT hasnulls bool,
+ 	OUT value text)
+ RETURNS SETOF record
+ AS 'MODULE_PATHNAME', 'minmax_page_items'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- minmax_revmap_data()
+ CREATE FUNCTION minmax_revmap_data(IN page bytea,
+ 	OUT pages tid[])
+ AS 'MODULE_PATHNAME', 'minmax_revmap_data'
+ LANGUAGE C STRICT;
+ 
+ --
  -- fsm_page_contents()
  --
  CREATE FUNCTION fsm_page_contents(IN page bytea)
*** a/contrib/pg_xlogdump/rmgrdesc.c
--- b/contrib/pg_xlogdump/rmgrdesc.c
***************
*** 13,18 ****
--- 13,19 ----
  #include "access/gist_private.h"
  #include "access/hash.h"
  #include "access/heapam_xlog.h"
+ #include "access/minmax_xlog.h"
  #include "access/multixact.h"
  #include "access/nbtree.h"
  #include "access/rmgr.h"
*** /dev/null
--- b/doc/src/sgml/brin.sgml
***************
*** 0 ****
--- 1,248 ----
+ <!-- doc/src/sgml/brin.sgml -->
+ 
+ <chapter id="BRIN">
+ <title>BRIN Indexes</title>
+ 
+    <indexterm>
+     <primary>index</primary>
+     <secondary>BRIN</secondary>
+    </indexterm>
+ 
+ <sect1 id="brin-intro">
+  <title>Introduction</title>
+ 
+  <para>
+   <acronym>BRIN</acronym> stands for Block Range Index.
+   <acronym>BRIN</acronym> is designed for handling very large tables
+   in which certain columns have some natural correlation with its
+   physical position.  For example, a table storing orders might have
+   a date column on which each order was placed, and much of the time
+   the earlier entries will appear earlier in the table as well; or a
+   table storing a ZIP code column might have all codes for a city
+   grouped together naturally.  For each block range, some summary info
+   is stored in the index.
+  </para>
+ 
+  <para>
+   <acronym>BRIN</acronym> indexes can satisfy queries via the bitmap
+   scanning facility only, and will return all tuples in all pages within
+   each range if the summary info stored by the index indicates that some
+   tuples in the range might match the given query conditions.  The executor
+   is in charge of rechecking these tuples and discarding those that do not 
+   match &mdash; in other words, these indexes are lossy.
+   This enables them to work as very fast sequential scan helpers to avoid
+   scanning blocks that are known not to contain matching tuples.
+  </para>
+ 
+  <para>
+   The specific data that a <acronym>BRIN</acronym> index will store
+   depends on the operator class selected for the data type.
+   Datatypes having a linear sort order can have operator classes that
+   store the minimum and maximum value within each block range, for instance;
+   geometrical types might store the common bounding box.
+  </para>
+   
+  <para>
+   The size of the block range is determined at index creation time with
+   the pages_per_range storage parameter.  The smaller the number, the
+   larger the index becomes (because of the need to store more index entries),
+   but at the same time the summary data stored can be more precise and
+   more data blocks can be skipped.
+  </para>
+ 
+  <para>
+   The <acronym>BRIN</acronym> implementation in <productname>PostgreSQL</productname>
+   is primarily maintained by &Aacute;lvaro Herrera.
+  </para>
+ </sect1>
+ 
+ <sect1 id="brin-builtin-opclasses">
+  <title>Built-in Operator Classes</title>
+ 
+  <para>
+   The core <productname>PostgreSQL</productname> distribution includes
+   includes the <acronym>BRIN</acronym> operator classes shown in 
+   <xref linkend="gin-builtin-opclasses-table">.
+  </para>
+ 
+  <table id="brin-builtin-opclasses-table">
+   <title>Built-in <acronym>BRIN</acronym> Operator Classes</title>
+   <tgroup cols="3">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Indexed Data Type</entry>
+      <entry>Indexable Operators</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry><literal>char_minmax_ops</literal></entry>
+      <entry><type>"char"</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>date_minmax_ops</literal></entry>
+      <entry><type>date</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>int4_minmax_ops</literal></entry>
+      <entry><type>integer</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>numeric_minmax_ops</literal></entry>
+      <entry><type>numeric</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>text_minmax_ops</literal></entry>
+      <entry><type>text</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>time_minmax_ops</literal></entry>
+      <entry><type>time</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>timetz_minmax_ops</literal></entry>
+      <entry><type>time with time zone</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>timestamp_minmax_ops</literal></entry>
+      <entry><type>timestamp</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>timestamptz_minmax_ops</literal></entry>
+      <entry><type>timestamp with time zone</type></entry>
+      <entry>
+       <literal>&lt;</literal>
+       <literal>&lt;=</literal>
+       <literal>=</literal>
+       <literal>&gt;=</literal>
+       <literal>&gt;</literal>
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+ 
+ <sect1 id="brin-extensibility">
+  <title>Extensibility</title>
+ 
+  <para>
+   The <acronym>BRIN</acronym> interface has a high level of abstraction,
+   requiring the access method implementer only to implement the semantics
+   of the data type being accessed.  The <acronym>BRIN</acronym> layer
+   itself takes care of concurrency, logging and searching the index structure.
+  </para>
+ 
+  <para>
+   All it takes to get a <acronym>BRIN</acronym> access method working is to
+   implement a few user-defined methods, which define the behavior of
+   summary values stored in the index and the way they interact with
+   scan keys.
+   In short, <acronym>BRIN</acronym> combines
+   extensibility with generality, code reuse, and a clean interface.
+  </para>
+ 
+  <para>
+   There are three methods that an operator class for <acronym>BRIN</acronym>
+   must provide:
+ 
+   <variablelist>
+    <varlistentry>
+     <term><function>Datum opcInfo(...)</></term>
+     <listitem>
+      <para>
+       Returns internal information about the summary data stored
+       about indexed columns.
+      </para>
+     </listitem>
+    </varlistentry>
+ 
+    <varlistentry>
+     <term><function>bool consistent(...)</function></term>
+     <listitem>
+      <para>
+       Returns whether the key is consistent with the given index tuple.
+      </para>
+     </listitem>
+    </varlistentry>
+ 
+    <varlistentry>
+     <term><function>bool addValue(...)</function></term>
+      <listitem>
+       <para>
+        Modifies the index tuple to make it consistent with the given
+        indexed data.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+ 
+ <!-- this needs improvement ... -->
+   To implement these methods in a generic ways, normally the opclass
+   defines its own internal support functions.  For instance, minmax
+   opclasses add the support functions for the four inequality operators
+   for the datatype.
+   Additionally, the operator class must supply appropriate
+   operator entries,
+   to enable the optimizer to use the index when those operators are
+   used in queries.
+  </para>
+ </sect1>
+ </chapter>
*** a/doc/src/sgml/filelist.sgml
--- b/doc/src/sgml/filelist.sgml
***************
*** 87,92 ****
--- 87,93 ----
  <!ENTITY gist       SYSTEM "gist.sgml">
  <!ENTITY spgist     SYSTEM "spgist.sgml">
  <!ENTITY gin        SYSTEM "gin.sgml">
+ <!ENTITY brin       SYSTEM "brin.sgml">
  <!ENTITY planstats    SYSTEM "planstats.sgml">
  <!ENTITY indexam    SYSTEM "indexam.sgml">
  <!ENTITY nls        SYSTEM "nls.sgml">
*** a/doc/src/sgml/indices.sgml
--- b/doc/src/sgml/indices.sgml
***************
*** 116,122 **** CREATE INDEX test1_id_index ON test1 (id);
  
    <para>
     <productname>PostgreSQL</productname> provides several index types:
!    B-tree, Hash, GiST, SP-GiST and GIN.  Each index type uses a different
     algorithm that is best suited to different types of queries.
     By default, the <command>CREATE INDEX</command> command creates
     B-tree indexes, which fit the most common situations.
--- 116,123 ----
  
    <para>
     <productname>PostgreSQL</productname> provides several index types:
!    B-tree, Hash, GiST, SP-GiST, GIN and BRIN.
!    Each index type uses a different
     algorithm that is best suited to different types of queries.
     By default, the <command>CREATE INDEX</command> command creates
     B-tree indexes, which fit the most common situations.
***************
*** 326,331 **** SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
--- 327,365 ----
     classes are available in the <literal>contrib</> collection or as separate
     projects.  For more information see <xref linkend="GIN">.
    </para>
+ 
+   <para>
+    <indexterm>
+     <primary>index</primary>
+     <secondary>BRIN</secondary>
+    </indexterm>
+    <indexterm>
+     <primary>BRIN</primary>
+     <see>index</see>
+    </indexterm>
+    BRIN indexes (a shorthand for Block Range indexes)
+    store summaries about the values stored in consecutive table physical block ranges.
+    Like GiST, SP-GiST and GIN,
+    BRIN can support many different indexing strategies,
+    and the particular operators with which a BRIN index can be used
+    vary depending on the indexing strategy.
+    For datatypes that have a linear sort order, the indexed data
+    corresponds to the minimum and maximum values of the
+    values in the column for each block range,
+    which support indexed queries using these operators:
+ 
+    <simplelist>
+     <member><literal>&lt;</literal></member>
+     <member><literal>&lt;=</literal></member>
+     <member><literal>=</literal></member>
+     <member><literal>&gt;=</literal></member>
+     <member><literal>&gt;</literal></member>
+    </simplelist>
+ 
+    The BRIN operator classes included in the standard distribution are
+    documented in <xref linkend="brin-builtin-opclasses-table">.
+    For more information see <xref linkend="BRIN">.
+   </para>
   </sect1>
  
  
*** a/doc/src/sgml/postgres.sgml
--- b/doc/src/sgml/postgres.sgml
***************
*** 247,252 ****
--- 247,253 ----
    &gist;
    &spgist;
    &gin;
+   &brin;
    &storage;
    &bki;
    &planstats;
*** /dev/null
--- b/minmax-proposal
***************
*** 0 ****
--- 1,306 ----
+ Minmax Range Indexes
+ ====================
+ 
+ Minmax indexes are a new access method intended to enable very fast scanning of
+ extremely large tables.
+ 
+ The essential idea of a minmax index is to keep track of summarizing values in
+ consecutive groups of heap pages (page ranges); for example, the minimum and
+ maximum values for datatypes with a btree opclass, or the bounding box for
+ geometric types.  These values can be used by constraint exclusion to avoid
+ scanning such pages, depending on query quals.
+ 
+ The main drawback of this is having to update the stored summary values of each
+ page range as tuples are inserted into them.
+ 
+ Other database systems already have similar features. Some examples:
+ 
+ * Oracle Exadata calls this "storage indexes"
+   http://richardfoote.wordpress.com/category/storage-indexes/
+ 
+ * Netezza has "zone maps"
+   http://nztips.com/2010/11/netezza-integer-join-keys/
+ 
+ * Infobright has this automatically within their "data packs" according to a
+   May 3rd, 2009 blog post
+   http://www.infobright.org/index.php/organizing_data_and_more_about_rough_data_contest/
+ 
+ * MonetDB also uses this technique, according to a published paper
+   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.108.2662
+   "Cooperative Scans: Dynamic Bandwidth Sharing in a DBMS"
+ 
+ Index creation
+ --------------
+ 
+ To create a minmax index, we use the standard wording:
+ 
+   CREATE INDEX foo_minmax_idx ON foo USING MINMAX (a, b, e);
+ 
+ Partial indexes are not supported currently; since an index is concerned with
+ summary values of the involved columns across all the pages in the table, it
+ normally doesn't make sense to exclude some tuples.  These might be useful if
+ the index predicates are also used in queries.  We exclude these for now for
+ conceptual simplicity.
+ 
+ Expressional indexes can probably be supported in the future, but we disallow
+ them initially for conceptual simplicity.
+ 
+ Having multiple minmax indexes in the same table is acceptable, though most of
+ the time it would make more sense to have a single index covering all the
+ interesting columns.  Multiple indexes might be useful for columns added later.
+ 
+ Access Method Design
+ --------------------
+ 
+ Since item pointers are not stored inside indexes of this type, it is not
+ possible to support the amgettuple interface.  Instead, we only provide
+ amgetbitmap support; scanning a relation using this index requires a recheck
+ node on top.  The amgetbitmap routine returns a TIDBitmap comprising all pages
+ in those page groups that match the query qualifications.  The recheck node
+ prunes tuples that are not visible according to the query qualifications.
+ 
+ For each supported datatype, we need an operator class with the following
+ catalog entries:
+ 
+ - support operators (pg_amop): same as btree (<, <=, =, >=, >)
+ - support procedures (pg_amproc):
+   * "opcinfo" (procno 1) initializes a structure for index creation or scanning
+   * "addValue" (procno 2) takes an index tuple and a heap item, and possibly
+     changes the index tuple so that it includes the heap item values
+   * "consistent" (procno 3) takes an index tuple and query quals, and returns
+     whether the index tuple values match the query quals.
+ 
+ These are used pervasively:
+ 
+ - The optimizer requires them to evaluate queries, so that the index is chosen
+   when queries on the indexed table are planned.
+ - During index construction (ambuild), they are used to determine the boundary
+   values for each page range.
+ - During index updates (aminsert), they are used to determine whether the new
+   heap tuple matches the existing index tuple; and if not, they are used to
+   construct the new index tuple.
+ 
+ In each index tuple (corresponding to one page range), we store:
+ - for each indexed column of a datatype with a btree-opclass:
+   * minimum value across all tuples in the range
+   * maximum value across all tuples in the range
+   * are there nulls present in any tuple?
+   * are null all the values in all tuples in the range?
+ 
+ Different datatypes store other values instead of min/max, for example
+ geometric types might store a bounding box.   The NULL bits are always present.
+ 
+ These null bits are stored in a single null bitmask of length 2x number of
+ columns.
+ 
+ With the default INDEX_MAX_KEYS of 32, and considering columns of 8-byte length
+ types such as timestamptz or bigint, each tuple would be 522 bytes in length,
+ which seems reasonable.  There are 6 extra bytes for padding between the null
+ bitmask and the first data item, assuming 64-bit alignment; so the total size
+ for such an index would actually be 528 bytes.
+ 
+ This maximum index tuple size is calculated as: mt_info (2 bytes) + null bitmap
+ (8 bytes) + data value (8 bytes) * 32 * 2
+ 
+ (Of course, larger columns are possible, such as varchar, but creating minmax
+ indexes on such columns seems of little practical usefulness.  Also, the
+ usefulness of an index containing so many columns is dubious.)
+ 
+ There can be gaps where some pages have no covering index entry.
+ 
+ The Range Reverse Map
+ ---------------------
+ 
+ To find out the index tuple for a particular page range, we have an internal
+ structure we call the range reverse map.  This stores one TID per page range,
+ which is the address of the index tuple summarizing that range.  Since these
+ map entries are fixed size, it is possible to compute the address of the range
+ map entry for any given heap page by simple arithmetic.
+ 
+ When a new heap tuple is inserted in a summarized page range, we compare the
+ existing index tuple with the new heap tuple.  If the heap tuple is outside the
+ summarization data given by the index tuple for any indexed column (or if the
+ new heap tuple contains null values but the index tuple indicate there are no
+ nulls), it is necessary to create a new index tuple with the new values.  To do
+ this, a new index tuple is inserted, and the reverse range map is updated to
+ point to it.  The old index tuple is left in place, for later garbage
+ collection.  As an optimization, we sometimes overwrite the old index tuple in
+ place with the new data, which avoids the need for later garbage collection.
+ 
+ If the reverse range map points to an invalid TID, the corresponding page range
+ is considered to be not summarized.
+ 
+ To scan a table following a minmax index, we scan the reverse range map
+ sequentially.  This yields index tuples in ascending page range order.  Query
+ quals are matched to each index tuple; if they match, each page within the page
+ range is returned as part of the output TID bitmap.  If there's no match, they
+ are skipped.  Reverse range map entries returning invalid index TIDs, that is
+ unsummarized page ranges, are also returned in the TID bitmap.
+ 
+ To store the range reverse map, we map its logical page numbers to physical
+ pages.  We use a large two-level BlockNumber array for this: The metapage
+ contains an array of BlockNumbers; each of these points to a "revmap array
+ page".  Each revmap array page contains BlockNumbers, which in turn point to
+ "revmap regular pages", which are the ones that contain the revmap data itself.
+ Therefore, to find a given index tuple, we need to examine the metapage and
+ obtain the revmap array page number; then read the array page.  From there we
+ obtain the revmap regular page number, and that one contains the TID we're
+ interested in.  As an optimization, regular revmap page number 0 is stored in
+ physical page number 1, that is, the page just after the metapage.  This means
+ that scanning a table of about 1300 page ranges (the number of TIDs that fit in
+ a single 8kB page) does not require accessing the metapage at all.
+ 
+ When tuples are added to unsummarized pages, nothing needs to happen.
+ 
+ Heap tuples can be removed from anywhere without restriction.  It might be
+ useful to mark the corresponding index tuple somehow, if the heap tuple is one
+ of the constraining values of the summary data (i.e. either min or max in the
+ case of a btree-opclass-bearing datatype), so that in the future we are aware
+ of the need to re-execute summarization on that range, leading to a possible
+ tightening of the summary values.
+ 
+ Index entries that are not referenced from the revmap can be removed from the
+ main fork.  This currently happens at amvacuumcleanup, though it could be
+ carried out separately; no heap scan is necessary to determine which tuples
+ are unreachable.
+ 
+ Summarization
+ -------------
+ 
+ At index creation time, the whole table is scanned; for each page range the
+ summarizing values of each indexed column and nulls bitmap are collected and
+ stored in the index.
+ 
+ Once in a while, it is necessary to summarize a bunch of unsummarized pages
+ (because the table has grown since the index was created), or re-summarize a
+ range that has been marked invalid.  This is simple: scan the page range
+ calculating the summary values for each indexed column, then insert the new
+ index entry at the end of the index.
+ 
+ The easiest way to go around this seems to have vacuum do it.  That way we can
+ simply do re-summarization on the amvacuumcleanup routine.  Other answers would
+ mean we need a separate AM routine, which appears unwarranted at this stage.
+ 
+ Vacuuming
+ ---------
+ 
+ Vacuuming a table that has a minmax index does not represent a significant
+ challenge.  Since no heap TIDs are stored, it's not necessary to scan the index
+ when heap tuples are removed.  It might be that some min() value can be
+ incremented, or some max() value can be decremented; but this would represent
+ an optimization opportunity only, not a correctness issue.  Perhaps it's
+ simpler to represent this as the need to re-run summarization on the affected
+ page range.
+ 
+ Note that if there are no indexes on the table other than the minmax index,
+ usage of maintenance_work_mem by vacuum can be decreased significantly, because
+ no detailed index scan needs to take place (and thus it's not necessary for
+ vacuum to save TIDs to remove).  This optimization opportunity is best left for
+ future improvement.
+ 
+ Locking considerations
+ ----------------------
+ 
+ To read the TID during an index scan, we follow this protocol:
+ 
+ * read revmap page
+ * obtain share lock on the revmap buffer
+ * read the TID
+ * obtain share lock on buffer of main fork
+ * LockTuple the TID (using the index as relation).  A shared lock is
+   sufficient.  We need the LockTuple to prevent VACUUM from recycling
+   the index tuple; see below.
+ * release revmap buffer lock
+ * read the index tuple
+ * release the tuple lock
+ * release main fork buffer lock
+ 
+ 
+ To update the summary tuple for a page range, we use this protocol:
+ 
+ * insert a new index tuple somewhere in the main fork; note its TID
+ * read revmap page
+ * obtain exclusive lock on revmap buffer
+ * write the TID
+ * release lock
+ 
+ This ensures no concurrent reader can obtain a partially-written TID.
+ Note we don't need a tuple lock here.  Concurrent scans don't have to
+ worry about whether they got the old or new index tuple: if they get the
+ old one, the tighter values are okay from a correctness standpoint because
+ due to MVCC they can't possibly see the just-inserted heap tuples anyway.
+ 
+ 
+ For vacuuming, we need to figure out which index tuples are no longer
+ referenced from the reverse range map.  This requires some brute force,
+ but is simple:
+ 
+ 1) scan the complete index, store each existing TIDs in a dynahash.
+    Hash key is the TID, hash value is a boolean initially set to false.
+ 2) scan the complete revmap sequentially, read the TIDs on each page.  Share
+    lock on each page is sufficient.  For each TID so obtained, grab the
+    element from the hash and update the boolean to true.
+ 3) Scan the index again; for each tuple found, search the hash table.
+    If the tuple is not present in hash, it must have been added after our
+    initial scan; ignore it.  If tuple is present in hash, and the hash flag is
+    true, then the tuple is referenced from the revmap; ignore it.  If the hash
+    flag is false, then the index tuple is no longer referenced by the revmap;
+    but it could be about to be accessed by a concurrent scan.  Do
+    ConditionalLockTuple.  If this fails, ignore the tuple (it's in use), it
+    will be deleted by a future vacuum.  If lock is acquired, then we can safely
+    remove the index tuple.
+ 4) Index pages with free space can be detected by this second scan.  Register
+    those with the FSM.
+ 
+ Note this doesn't require scanning the heap at all, or being involved in
+ the heap's cleanup procedure.  Also, there is no need to LockBufferForCleanup,
+ which is a nice property because index scans keep pages pinned for long
+ periods.
+ 
+ 
+ 
+ Optimizer
+ ---------
+ 
+ In order to make this all work, the only thing we need to do is ensure we have a
+ good enough opclass and amcostestimate.  With this, the optimizer is able to pick
+ up the index on its own.
+ 
+ 
+ Open questions
+ --------------
+ 
+ * Same-size page ranges?
+   Current related literature seems to consider that each "index entry" in a
+   minmax index must cover the same number of pages.  There doesn't seem to be a
+   hard reason for this to be so; it might make sense to allow the index to
+   self-tune so that some index entries cover smaller page ranges, if this allows
+   the summary values to be more compact.  This would incur larger minmax
+   overhead for the index itself, but might allow better pruning of page ranges
+   during scan.  In the limit of one index tuple per page, the index itself would
+   occupy too much space, even though we would be able to skip reading the most
+   heap pages, because the summary values are tight; in the opposite limit of
+   a single tuple that summarizes the whole table, we wouldn't be able to prune
+   anything even though the index is very small.  This can probably be made to work
+   by using the reverse range map as an index in itself.
+ 
+ * More compact representation for TIDBitmap?
+   TIDBitmap is the structure used to represent bitmap scans.  The
+   representation of lossy page ranges is not optimal for our purposes, because
+   it uses a Bitmapset to represent pages in the range; since we're going to return
+   all pages in a large range, it might be more convenient to allow for a
+   struct that uses start and end page numbers to represent the range, instead.
+ 
+ 
+ 
+ References:
+ 
+ Email thread on pgsql-hackers
+   http://www.postgresql.org/message-id/1199296574.7260.149.camel@ebony.site
+   From: Simon Riggs
+   To: pgsql-hackers
+   Subject: Dynamic Partitioning using Segment Visibility Map
+ 
+ http://wiki.postgresql.org/wiki/Segment_Exclusion
+ http://wiki.postgresql.org/wiki/Segment_Visibility_Map
+ 
*** a/src/backend/access/Makefile
--- b/src/backend/access/Makefile
***************
*** 8,13 **** subdir = src/backend/access
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
! SUBDIRS	    = common gin gist hash heap index nbtree rmgrdesc spgist transam
  
  include $(top_srcdir)/src/backend/common.mk
--- 8,13 ----
  top_builddir = ../../..
  include $(top_builddir)/src/Makefile.global
  
! SUBDIRS	    = common gin gist hash heap index minmax nbtree rmgrdesc spgist transam
  
  include $(top_srcdir)/src/backend/common.mk
*** a/src/backend/access/common/reloptions.c
--- b/src/backend/access/common/reloptions.c
***************
*** 209,214 **** static relopt_int intRelOpts[] =
--- 209,221 ----
  			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST
  		}, -1, 0, 2000000000
  	},
+ 	{
+ 		{
+ 			"pages_per_range",
+ 			"Number of pages that each page range covers in a Minmax index",
+ 			RELOPT_KIND_MINMAX
+ 		}, 128, 1, 131072
+ 	},
  
  	/* list terminator */
  	{{NULL}}
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 271,276 **** initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
--- 271,278 ----
  		scan->rs_startblock = 0;
  	}
  
+ 	scan->rs_initblock = 0;
+ 	scan->rs_numblocks = InvalidBlockNumber;
  	scan->rs_inited = false;
  	scan->rs_ctup.t_data = NULL;
  	ItemPointerSetInvalid(&scan->rs_ctup.t_self);
***************
*** 296,301 **** initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
--- 298,311 ----
  		pgstat_count_heap_scan(scan->rs_rd);
  }
  
+ void
+ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
+ {
+ 	scan->rs_startblock = startBlk;
+ 	scan->rs_initblock = startBlk;
+ 	scan->rs_numblocks = numBlks;
+ }
+ 
  /*
   * heapgetpage - subroutine for heapgettup()
   *
***************
*** 636,642 **** heapgettup(HeapScanDesc scan,
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
--- 646,653 ----
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
***************
*** 646,652 **** heapgettup(HeapScanDesc scan,
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
--- 657,664 ----
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
***************
*** 897,903 **** heapgettup_pagemode(HeapScanDesc scan,
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
--- 909,916 ----
  		 */
  		if (backward)
  		{
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  			if (page == 0)
  				page = scan->rs_nblocks;
  			page--;
***************
*** 907,913 **** heapgettup_pagemode(HeapScanDesc scan,
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
--- 920,927 ----
  			page++;
  			if (page >= scan->rs_nblocks)
  				page = 0;
! 			finished = (page == scan->rs_startblock) ||
! 				(scan->rs_numblocks != InvalidBlockNumber ? --scan->rs_numblocks <= 0 : false);
  
  			/*
  			 * Report our new scan position for synchronization purposes. We
*** /dev/null
--- b/src/backend/access/minmax/Makefile
***************
*** 0 ****
--- 1,17 ----
+ #-------------------------------------------------------------------------
+ #
+ # Makefile--
+ #    Makefile for access/minmax
+ #
+ # IDENTIFICATION
+ #    src/backend/access/minmax/Makefile
+ #
+ #-------------------------------------------------------------------------
+ 
+ subdir = src/backend/access/minmax
+ top_builddir = ../../../..
+ include $(top_builddir)/src/Makefile.global
+ 
+ OBJS = minmax.o mmpageops.o mmrevmap.o mmtuple.o mmxlog.o mmsortable.o
+ 
+ include $(top_srcdir)/src/backend/common.mk
*** /dev/null
--- b/src/backend/access/minmax/minmax.c
***************
*** 0 ****
--- 1,942 ----
+ /*
+  * minmax.c
+  *		Implementation of Minmax indexes for Postgres
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/minmax.c
+  *
+  * TODO
+  *		* ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
+  *		* add support for unlogged indexes
+  *		* ditto expressional indexes
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_pageops.h"
+ #include "access/minmax_xlog.h"
+ #include "access/reloptions.h"
+ #include "access/relscan.h"
+ #include "catalog/index.h"
+ #include "miscadmin.h"
+ #include "pgstat.h"
+ #include "storage/bufmgr.h"
+ #include "storage/freespace.h"
+ #include "utils/rel.h"
+ 
+ 
+ /*
+  * We use a MMBuildState during initial construction of a Minmax index.
+  * The running state is kept in a DeformedMMTuple.
+  */
+ typedef struct MMBuildState
+ {
+ 	Relation	irel;
+ 	int			numtuples;
+ 	Buffer		currentInsertBuf;
+ 	BlockNumber pagesPerRange;
+ 	BlockNumber currRangeStart;
+ 	mmRevmapAccess *rmAccess;
+ 	MinmaxDesc *mmDesc;
+ 	bool		seentup;
+ 	bool		extended;
+ 	DeformedMMTuple *dtuple;
+ } MMBuildState;
+ 
+ /*
+  * Struct used as "opaque" during index scans
+  */
+ typedef struct MinmaxOpaque
+ {
+ 	BlockNumber		pagesPerRange;
+ 	mmRevmapAccess *rmAccess;
+ 	MinmaxDesc	   *mmDesc;
+ } MinmaxOpaque;
+ 
+ static MMBuildState *initialize_mm_buildstate(Relation idxRel,
+ 						 mmRevmapAccess *rmAccess, BlockNumber pagesPerRange);
+ static bool terminate_mm_buildstate(MMBuildState *state);
+ static void summarize_range(MMBuildState *mmstate, Relation heapRel,
+ 				BlockNumber heapBlk);
+ static void form_and_insert_tuple(MMBuildState *mmstate);
+ 
+ 
+ /*
+  * A tuple in the heap is being inserted.  To keep a minmax index up to date,
+  * we need to obtain the relevant index tuple, compare its stored values with
+  * those of the new tuple; if the tuple values are consistent with the summary
+  * tuple, there's nothing to do; otherwise we need to update the index.
+  *
+  * If the range is not currently summarized (i.e. the revmap returns InvalidTid
+  * for it), there's nothing to do either.
+  */
+ Datum
+ mminsert(PG_FUNCTION_ARGS)
+ {
+ 	Relation	idxRel = (Relation) PG_GETARG_POINTER(0);
+ 	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);
+ 	bool	   *nulls = (bool *) PG_GETARG_POINTER(2);
+ 	ItemPointer heaptid = (ItemPointer) PG_GETARG_POINTER(3);
+ 
+ 	/* we ignore the rest of our arguments */
+ 	BlockNumber pagesPerRange;
+ 	MinmaxDesc *mmdesc;
+ 	mmRevmapAccess *rmAccess;
+ 	OffsetNumber off;
+ 	MMTuple    *mmtup;
+ 	DeformedMMTuple *dtup;
+ 	BlockNumber heapBlk;
+ 	Buffer		buf = InvalidBuffer;
+ 	int			keyno;
+ 	bool		need_insert = false;
+ 	bool		extended;
+ 
+ 	rmAccess = mmRevmapAccessInit(idxRel, &pagesPerRange);
+ 
+ restart:
+ 	CHECK_FOR_INTERRUPTS();
+ 	heapBlk = ItemPointerGetBlockNumber(heaptid);
+ 	/* normalize the block number to be the first block in the range */
+ 	heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
+ 	mmtup = mmGetMMTupleForHeapBlock(rmAccess, heapBlk, &buf, &off,
+ 									 BUFFER_LOCK_SHARE);
+ 
+ 	if (!mmtup)
+ 	{
+ 		/* nothing to do, range is unsummarized */
+ 		mmRevmapAccessTerminate(rmAccess);
+ 		if (BufferIsValid(buf))
+ 			ReleaseBuffer(buf);
+ 		return BoolGetDatum(false);
+ 	}
+ 
+ 	mmdesc = minmax_build_mmdesc(idxRel);
+ 	dtup = minmax_deform_tuple(mmdesc, mmtup);
+ 
+ 	/*
+ 	 * Compare the key values of the new tuple to the stored index values; our
+ 	 * deformed tuple will get updated if the new tuple doesn't fit the
+ 	 * original range (note this means we can't break out of the loop early).
+ 	 * Make a note of whether this happens, so that we know to insert the
+ 	 * modified tuple later.
+ 	 */
+ 	for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 	{
+ 		Datum	result;
+ 		FmgrInfo   *addValue;
+ 
+ 		addValue = index_getprocinfo(idxRel, keyno + 1,
+ 									 MINMAX_PROCNUM_ADDVALUE);
+ 		result = FunctionCall5Coll(addValue,
+ 								   idxRel->rd_indcollation[keyno],
+ 								   PointerGetDatum(mmdesc),
+ 								   PointerGetDatum(dtup),
+ 								   UInt16GetDatum(keyno + 1),
+ 								   values[keyno],
+ 								   nulls[keyno]);
+ 		/* if that returned true, we need to insert the updated tuple */
+ 		need_insert |= DatumGetBool(result);
+ 	}
+ 
+ 	if (need_insert)
+ 	{
+ 		Page		page = BufferGetPage(buf);
+ 		ItemId		lp = PageGetItemId(page, off);
+ 		Size		origsz;
+ 		MMTuple	   *origtup;
+ 		Size		newsz;
+ 		MMTuple    *newtup;
+ 		bool		samepage;
+ 
+ 		/*
+ 		 * Make a copy of the old tuple, so that we can compare it after
+ 		 * re-acquiring the lock.
+ 		 */
+ 		origsz = ItemIdGetLength(lp);
+ 		origtup = minmax_copy_tuple(mmtup, origsz);
+ 
+ 		/* before releasing the lock, check if we can do a same-page update. */
+ 		if (newsz <= origsz || PageGetExactFreeSpace(page) >= (origsz - newsz))
+ 			samepage = true;
+ 		else
+ 			samepage = false;
+ 
+ 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 		newtup = minmax_form_tuple(mmdesc, heapBlk, dtup, &newsz);
+ 
+ 		/*
+ 		 * Try to update the tuple.  If this doesn't work for whatever reason,
+ 		 * we need to restart from the top; the revmap might be pointing at a
+ 		 * different tuple for this block now, so we need to recompute
+ 		 * to ensure both our new heap tuple and the other inserter's are
+ 		 * covered by the combined tuple.  It might be that we don't need to
+ 		 * update at all.
+ 		 */
+ 		if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, heapBlk, buf, off,
+ 						 origtup, origsz, newtup, newsz, samepage, &extended))
+ 			goto restart;
+ 	}
+ 	else
+ 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 	ReleaseBuffer(buf);
+ 
+ 	mmRevmapAccessTerminate(rmAccess);
+ 	minmax_free_mmdesc(mmdesc);
+ 
+ 	if (extended)
+ 		FreeSpaceMapVacuum(idxRel);
+ 
+ 	return BoolGetDatum(false);
+ }
+ 
+ /*
+  * Initialize state for a Minmax index scan.
+  *
+  * We read the metapage here to determine the pages-per-range number that this
+  * index was built with.  Note that since this cannot be changed while we're
+  * holding lock on index, it's not necessary to recompute it during mmrescan.
+  */
+ Datum
+ mmbeginscan(PG_FUNCTION_ARGS)
+ {
+ 	Relation	r = (Relation) PG_GETARG_POINTER(0);
+ 	int			nkeys = PG_GETARG_INT32(1);
+ 	int			norderbys = PG_GETARG_INT32(2);
+ 	IndexScanDesc scan;
+ 	MinmaxOpaque *opaque;
+ 
+ 	scan = RelationGetIndexScan(r, nkeys, norderbys);
+ 
+ 	opaque = (MinmaxOpaque *) palloc(sizeof(MinmaxOpaque));
+ 	opaque->rmAccess = mmRevmapAccessInit(r, &opaque->pagesPerRange);
+ 	opaque->mmDesc = minmax_build_mmdesc(r);
+ 	scan->opaque = opaque;
+ 
+ 	PG_RETURN_POINTER(scan);
+ }
+ 
+ /*
+  * Execute the index scan.
+  *
+  * This works by reading index TIDs from the revmap, and obtaining the index
+  * tuples pointed to by them; the summary values in the index tuples are
+  * compared to the scan keys.  We return into the TID bitmap all the pages in
+  * ranges corresponding to index tuples that match the scan keys.
+  *
+  * If a TID from the revmap is read as InvalidTID, we know that range is
+  * unsummarized.  Pages in those ranges need to be returned regardless of scan
+  * keys.
+  *
+  * XXX see _bt_first on what to do about sk_subtype.
+  */
+ Datum
+ mmgetbitmap(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 	TIDBitmap  *tbm = (TIDBitmap *) PG_GETARG_POINTER(1);
+ 	Relation	idxRel = scan->indexRelation;
+ 	Buffer		buf = InvalidBuffer;
+ 	MinmaxDesc *mmdesc;
+ 	Oid			heapOid;
+ 	Relation	heapRel;
+ 	MinmaxOpaque *opaque;
+ 	BlockNumber nblocks;
+ 	BlockNumber heapBlk;
+ 	int			totalpages = 0;
+ 	int			keyno;
+ 	FmgrInfo   *consistentFn;
+ 
+ 	opaque = (MinmaxOpaque *) scan->opaque;
+ 	mmdesc = opaque->mmDesc;
+ 	pgstat_count_index_scan(idxRel);
+ 
+ 	/*
+ 	 * XXX We need to know the size of the table so that we know how long to
+ 	 * iterate on the revmap.  There's room for improvement here, in that we
+ 	 * could have the revmap tell us when to stop iterating.
+ 	 */
+ 	heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
+ 	heapRel = heap_open(heapOid, AccessShareLock);
+ 	nblocks = RelationGetNumberOfBlocks(heapRel);
+ 	heap_close(heapRel, AccessShareLock);
+ 
+ 	/*
+ 	 * Obtain consistent functions for all indexed column.  Maybe it'd be
+ 	 * possible to do this lazily only the first time we see a scan key that
+ 	 * involves each particular attribute.
+ 	 */
+ 	consistentFn = palloc(sizeof(FmgrInfo) * mmdesc->md_tupdesc->natts);
+ 	for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 	{
+ 		FmgrInfo   *tmp;
+ 
+ 		tmp = index_getprocinfo(idxRel, keyno + 1, MINMAX_PROCNUM_CONSISTENT);
+ 		fmgr_info_copy(&consistentFn[keyno], tmp, CurrentMemoryContext);
+ 	}
+ 
+ 	/*
+ 	 * Now scan the revmap.  We start by querying for heap page 0,
+ 	 * incrementing by the number of pages per range; this gives us a full
+ 	 * view of the table.
+ 	 */
+ 	for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->pagesPerRange)
+ 	{
+ 		bool		addrange;
+ 		OffsetNumber off;
+ 		MMTuple	   *tup;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		tup = mmGetMMTupleForHeapBlock(opaque->rmAccess, heapBlk, &buf, &off,
+ 									   BUFFER_LOCK_SHARE);
+ 		/*
+ 		 * For page ranges with no indexed tuple, we must return the whole
+ 		 * range; otherwise, compare it to the scan keys.
+ 		 */
+ 		if (tup == NULL)
+ 		{
+ 			addrange = true;
+ 		}
+ 		else
+ 		{
+ 			DeformedMMTuple *dtup;
+ 			int			keyno;
+ 
+ 			dtup = minmax_deform_tuple(mmdesc, tup);
+ 
+ 			/*
+ 			 * Compare scan keys with summary values stored for the range.  If
+ 			 * scan keys are matched, the page range must be added to the
+ 			 * bitmap.  We initially assume the range needs to be added; in
+ 			 * particular this serves the case where there are no keys.
+ 			 */
+ 			addrange = true;
+ 			for (keyno = 0; keyno < scan->numberOfKeys; keyno++)
+ 			{
+ 				ScanKey		key = &scan->keyData[keyno];
+ 				AttrNumber	keyattno = key->sk_attno;
+ 				Datum		add;
+ 
+ 				/*
+ 				 * The collation of the scan key must match the collation used
+ 				 * in the index column.  Otherwise we shouldn't be using this
+ 				 * index ...
+ 				 */
+ 				Assert(key->sk_collation ==
+ 					   mmdesc->md_tupdesc->attrs[keyattno - 1]->attcollation);
+ 
+ 				/*
+ 				 * Check whether the scan key is consistent with the page range
+ 				 * values; if so, have the pages in the range added to the
+ 				 * output bitmap.
+ 				 *
+ 				 * When there are multiple scan keys, failure to meet the
+ 				 * criteria for a single one of them is enough to discard the
+ 				 * range as a whole, so break out of the loop as soon as a
+ 				 * false return value is obtained.
+ 				 */
+ 				add = FunctionCall3Coll(&consistentFn[keyattno - 1],
+ 										key->sk_collation,
+ 										PointerGetDatum(mmdesc),
+ 										PointerGetDatum(dtup),
+ 										PointerGetDatum(key));
+ 				addrange = DatumGetBool(add);
+ 				if (!addrange)
+ 					break;
+ 			}
+ 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 			pfree(dtup);
+ 		}
+ 
+ 		/* add the pages in the range to the output bitmap, if needed */
+ 		if (addrange)
+ 		{
+ 			BlockNumber pageno;
+ 
+ 			for (pageno = heapBlk;
+ 				 pageno <= heapBlk + opaque->pagesPerRange - 1;
+ 				 pageno++)
+ 			{
+ 				tbm_add_page(tbm, pageno);
+ 				totalpages++;
+ 			}
+ 		}
+ 	}
+ 
+ 	if (buf != InvalidBuffer)
+ 		ReleaseBuffer(buf);
+ 
+ 	/*
+ 	 * XXX We have an approximation of the number of *pages* that our scan
+ 	 * returns, but we don't have a precise idea of the number of heap tuples
+ 	 * involved.
+ 	 */
+ 	PG_RETURN_INT64(totalpages * 10);
+ }
+ 
+ /*
+  * Re-initialize state for a minmax index scan
+  */
+ Datum
+ mmrescan(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);
+ 	/* other arguments ignored */
+ 
+ 	if (scankey && scan->numberOfKeys > 0)
+ 		memmove(scan->keyData, scankey,
+ 				scan->numberOfKeys * sizeof(ScanKeyData));
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ /*
+  * Close down a minmax index scan
+  */
+ Datum
+ mmendscan(PG_FUNCTION_ARGS)
+ {
+ 	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
+ 	MinmaxOpaque *opaque = (MinmaxOpaque *) scan->opaque;
+ 
+ 	mmRevmapAccessTerminate(opaque->rmAccess);
+ 	minmax_free_mmdesc(opaque->mmDesc);
+ 	pfree(opaque);
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ Datum
+ mmmarkpos(PG_FUNCTION_ARGS)
+ {
+ 	elog(ERROR, "MinMax does not support mark/restore");
+ 	PG_RETURN_VOID();
+ }
+ 
+ Datum
+ mmrestrpos(PG_FUNCTION_ARGS)
+ {
+ 	elog(ERROR, "MinMax does not support mark/restore");
+ 	PG_RETURN_VOID();
+ }
+ 
+ /*
+  * Per-heap-tuple callback for IndexBuildHeapScan.
+  *
+  * Note we don't worry about the page range at the end of the table here; it is
+  * present in the build state struct after we're called the last time, but not
+  * inserted into the index.  Caller must ensure to do so, if appropriate.
+  */
+ static void
+ mmbuildCallback(Relation index,
+ 				HeapTuple htup,
+ 				Datum *values,
+ 				bool *isnull,
+ 				bool tupleIsAlive,
+ 				void *state)
+ {
+ 	MMBuildState *mmstate = (MMBuildState *) state;
+ 	BlockNumber thisblock;
+ 	int			i;
+ 
+ 	thisblock = ItemPointerGetBlockNumber(&htup->t_self);
+ 
+ 	/*
+ 	 * If we're in a new block which belongs to the next range, summarize what
+ 	 * we've got and start afresh.
+ 	 */
+ 	if (thisblock > (mmstate->currRangeStart + mmstate->pagesPerRange - 1))
+ 	{
+ 
+ 		MINMAX_elog(DEBUG2, "mmbuildCallback: completed a range: %u--%u",
+ 					mmstate->currRangeStart,
+ 					mmstate->currRangeStart + mmstate->pagesPerRange);
+ 
+ 		/* create the index tuple and insert it */
+ 		form_and_insert_tuple(mmstate);
+ 
+ 		/* set state to correspond to the next range */
+ 		mmstate->currRangeStart += mmstate->pagesPerRange;
+ 
+ 		/* re-initialize state for it */
+ 		minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
+ 	}
+ 
+ 	/* Accumulate the current tuple into the running state */
+ 	mmstate->seentup = true;
+ 	for (i = 0; i < mmstate->mmDesc->md_tupdesc->natts; i++)
+ 	{
+ 		FmgrInfo   *addValue;
+ 
+ 		addValue = index_getprocinfo(index, i + 1,
+ 									 MINMAX_PROCNUM_ADDVALUE);
+ 
+ 		/*
+ 		 * Update dtuple state, if and as necessary.
+ 		 */
+ 		FunctionCall5Coll(addValue,
+ 						  mmstate->mmDesc->md_tupdesc->attrs[i]->attcollation,
+ 						  PointerGetDatum(mmstate->mmDesc),
+ 						  PointerGetDatum(mmstate->dtuple),
+ 						  UInt16GetDatum(i + 1), values[i], isnull[i]);
+ 	}
+ }
+ 
+ /*
+  * mmbuild() -- build a new minmax index.
+  */
+ Datum
+ mmbuild(PG_FUNCTION_ARGS)
+ {
+ 	Relation	heap = (Relation) PG_GETARG_POINTER(0);
+ 	Relation	index = (Relation) PG_GETARG_POINTER(1);
+ 	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
+ 	IndexBuildResult *result;
+ 	double		reltuples;
+ 	double		idxtuples;
+ 	mmRevmapAccess *rmAccess;
+ 	MMBuildState *mmstate;
+ 	Buffer		meta;
+ 	BlockNumber pagesPerRange;
+ 
+ 	/*
+ 	 * We expect to be called exactly once for any index relation.
+ 	 */
+ 	if (RelationGetNumberOfBlocks(index) != 0)
+ 		elog(ERROR, "index \"%s\" already contains data",
+ 			 RelationGetRelationName(index));
+ 
+ 	/* partial indexes not supported */
+ 	if (indexInfo->ii_Predicate != NIL)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("partial indexes not supported")));
+ 	/* expressions not supported (yet?) */
+ 	if (indexInfo->ii_Expressions != NIL)
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 				 errmsg("expression indexes not supported")));
+ 
+ 	/*
+ 	 * Critical section not required, because on error the creation of the
+ 	 * whole relation will be rolled back.
+ 	 */
+ 
+ 	meta = ReadBuffer(index, P_NEW);
+ 	Assert(BufferGetBlockNumber(meta) == MINMAX_METAPAGE_BLKNO);
+ 	LockBuffer(meta, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 	mm_metapage_init(BufferGetPage(meta), MinmaxGetPagesPerRange(index),
+ 					 MINMAX_CURRENT_VERSION);
+ 	MarkBufferDirty(meta);
+ 
+ 	if (RelationNeedsWAL(index))
+ 	{
+ 		xl_minmax_createidx xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata;
+ 		Page		page;
+ 
+ 		xlrec.node = index->rd_node;
+ 		xlrec.version = MINMAX_CURRENT_VERSION;
+ 		xlrec.pagesPerRange = MinmaxGetPagesPerRange(index);
+ 
+ 		rdata.buffer = InvalidBuffer;
+ 		rdata.data = (char *) &xlrec;
+ 		rdata.len = SizeOfMinmaxCreateIdx;
+ 		rdata.next = NULL;
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_CREATE_INDEX, &rdata);
+ 
+ 		page = BufferGetPage(meta);
+ 		PageSetLSN(page, recptr);
+ 	}
+ 
+ 	UnlockReleaseBuffer(meta);
+ 
+ 	/*
+ 	 * Initialize our state, including the deformed tuple state.
+ 	 */
+ 	rmAccess = mmRevmapAccessInit(index, &pagesPerRange);
+ 	mmstate = initialize_mm_buildstate(index, rmAccess, pagesPerRange);
+ 
+ 	/*
+ 	 * Now scan the relation.  No syncscan allowed here because we want the
+ 	 * heap blocks in physical order.
+ 	 */
+ 	reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
+ 								   mmbuildCallback, (void *) mmstate);
+ 
+ 	/* process the final batch */
+ 	form_and_insert_tuple(mmstate);
+ 
+ 	/* release resources */
+ 	idxtuples = mmstate->numtuples;
+ 	mmRevmapAccessTerminate(mmstate->rmAccess);
+ 	if (terminate_mm_buildstate(mmstate))
+ 		FreeSpaceMapVacuum(index);
+ 
+ 	/*
+ 	 * Return statistics
+ 	 */
+ 	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+ 
+ 	result->heap_tuples = reltuples;
+ 	result->index_tuples = idxtuples;
+ 
+ 	PG_RETURN_POINTER(result);
+ }
+ 
+ Datum
+ mmbuildempty(PG_FUNCTION_ARGS)
+ {
+ 	ereport(ERROR,
+ 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 			 errmsg("unlogged MinMax indexes are not supported")));
+ 
+ 	PG_RETURN_VOID();
+ }
+ 
+ /*
+  * mmbulkdelete
+  *		Since there are no per-heap-tuple index tuples in minmax indexes,
+  *		there's not a lot we can do here.
+  *
+  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
+  * tuple is deleted), meaning the need to re-run summarization on the affected
+  * range.  Need to an extra flag in mmtuples for that.
+  */
+ Datum
+ mmbulkdelete(PG_FUNCTION_ARGS)
+ {
+ 	/* other arguments are not currently used */
+ 	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ 
+ 	/* allocate stats if first time through, else re-use existing struct */
+ 	if (stats == NULL)
+ 		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ 
+ 	PG_RETURN_POINTER(stats);
+ }
+ 
+ /*
+  * This routine is in charge of "vacuuming" a minmax index: we just summarize
+  * ranges that are currently unsummarized.
+  */
+ Datum
+ mmvacuumcleanup(PG_FUNCTION_ARGS)
+ {
+ 	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+ 	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ 	mmRevmapAccess *rmAccess;
+ 	MMBuildState *mmstate = NULL;
+ 	Relation	heapRel;
+ 	BlockNumber	heapNumBlocks;
+ 	BlockNumber heapBlk;
+ 	BlockNumber pagesPerRange;
+ 	Buffer		buf;
+ 
+ 	/* No-op in ANALYZE ONLY mode */
+ 	if (info->analyze_only)
+ 		PG_RETURN_POINTER(stats);
+ 
+ 	heapRel = heap_open(IndexGetRelation(RelationGetRelid(info->index), false),
+ 						AccessShareLock);
+ 
+ 	rmAccess = mmRevmapAccessInit(info->index, &pagesPerRange);
+ 
+ 	/*
+ 	 * Scan the revmap to find unsummarized items.
+ 	 */
+ 	buf = InvalidBuffer;
+ 	heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
+ 	for (heapBlk = 0; heapBlk < heapNumBlocks; heapBlk += pagesPerRange)
+ 	{
+ 		MMTuple	   *tup;
+ 		OffsetNumber off;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		tup = mmGetMMTupleForHeapBlock(rmAccess, heapBlk, &buf, &off,
+ 									   BUFFER_LOCK_SHARE);
+ 		if (tup == NULL)
+ 		{
+ 			/* no revmap entry for this heap range. Summarize it. */
+ 			if (mmstate == NULL)
+ 				mmstate = initialize_mm_buildstate(info->index, rmAccess,
+ 												   pagesPerRange);
+ 			summarize_range(mmstate, heapRel, heapBlk);
+ 		}
+ 		else
+ 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 	}
+ 
+ 	if (BufferIsValid(buf))
+ 		ReleaseBuffer(buf);
+ 
+ 	/* free resources */
+ 	mmRevmapAccessTerminate(rmAccess);
+ 	if (mmstate && terminate_mm_buildstate(mmstate))
+ 		FreeSpaceMapVacuum(info->index);
+ 
+ 	heap_close(heapRel, AccessShareLock);
+ 
+ 	PG_RETURN_POINTER(stats);
+ }
+ 
+ /*
+  * reloptions processor for minmax indexes
+  */
+ Datum
+ mmoptions(PG_FUNCTION_ARGS)
+ {
+ 	Datum		reloptions = PG_GETARG_DATUM(0);
+ 	bool		validate = PG_GETARG_BOOL(1);
+ 	relopt_value *options;
+ 	MinmaxOptions *rdopts;
+ 	int			numoptions;
+ 	static const relopt_parse_elt tab[] = {
+ 		{"pages_per_range", RELOPT_TYPE_INT, offsetof(MinmaxOptions, pagesPerRange)}
+ 	};
+ 
+ 	options = parseRelOptions(reloptions, validate, RELOPT_KIND_MINMAX,
+ 							  &numoptions);
+ 
+ 	/* if none set, we're done */
+ 	if (numoptions == 0)
+ 		PG_RETURN_NULL();
+ 
+ 	rdopts = allocateReloptStruct(sizeof(MinmaxOptions), options, numoptions);
+ 
+ 	fillRelOptions((void *) rdopts, sizeof(MinmaxOptions), options, numoptions,
+ 				   validate, tab, lengthof(tab));
+ 
+ 	pfree(options);
+ 
+ 	PG_RETURN_BYTEA_P(rdopts);
+ }
+ 
+ /*
+  * Initialize a page with the given type.
+  *
+  * Caller is responsible for marking it dirty, as appropriate.
+  */
+ void
+ mm_page_init(Page page, uint16 type)
+ {
+ 	MinmaxSpecialSpace *special;
+ 
+ 	PageInit(page, BLCKSZ, sizeof(MinmaxSpecialSpace));
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	special->type = type;
+ }
+ 
+ 
+ /*
+  * Initialize a new minmax index' metapage.
+  */
+ void
+ mm_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
+ {
+ 	MinmaxMetaPageData	*metadata;
+ 
+ 	mm_page_init(page, MINMAX_PAGETYPE_META);
+ 
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(page);
+ 
+ 	metadata->minmaxMagic = MINMAX_META_MAGIC;
+ 	metadata->minmaxVersion = version;
+ 	metadata->pagesPerRange = pagesPerRange;
+ 
+ 	/*
+ 	 * Note we cheat here a little.  0 is not a valid revmap block number
+ 	 * (because it's the metapage buffer), but doing this enables the first
+ 	 * revmap page to be created when the index is.
+ 	 */
+ 	metadata->lastRevmapPage = 0;
+ }
+ 
+ /*
+  * Build a MinmaxDesc used to create or scan a minmax index
+  */
+ MinmaxDesc *
+ minmax_build_mmdesc(Relation rel)
+ {
+ 	MinmaxOpcInfo **opcinfo;
+ 	MinmaxDesc *mmdesc;
+ 	TupleDesc	tupdesc;
+ 	int			totalstored = 0;
+ 	int			keyno;
+ 	long		totalsize;
+ 
+ 	tupdesc = RelationGetDescr(rel);
+ 	IncrTupleDescRefCount(tupdesc);
+ 
+ 	/*
+ 	 * Obtain MinmaxOpcInfo for each indexed column.  While at it, accumulate
+ 	 * the number of columns stored, since the number is opclass-defined.
+ 	 */
+ 	opcinfo = (MinmaxOpcInfo **) palloc(sizeof(MinmaxOpcInfo *) * tupdesc->natts);
+ 	for (keyno = 0; keyno < tupdesc->natts; keyno++)
+ 	{
+ 		FmgrInfo *opcInfoFn;
+ 
+ 		opcInfoFn = index_getprocinfo(rel, keyno + 1, MINMAX_PROCNUM_OPCINFO);
+ 
+ 		/* actually FunctionCall0 but we don't have that */
+ 		opcinfo[keyno] = (MinmaxOpcInfo *)
+ 			DatumGetPointer(FunctionCall1(opcInfoFn, InvalidOid));
+ 		totalstored += opcinfo[keyno]->oi_nstored;
+ 	}
+ 
+ 	/* Allocate our result struct and fill it in */
+ 	totalsize = offsetof(MinmaxDesc, md_info) +
+ 		sizeof(MinmaxOpcInfo *) * tupdesc->natts;
+ 
+ 	mmdesc = palloc(totalsize);
+ 	mmdesc->md_index = rel;
+ 	mmdesc->md_tupdesc = tupdesc;
+ 	mmdesc->md_disktdesc = NULL;	/* generated lazily */
+ 	mmdesc->md_totalstored = totalstored;
+ 
+ 	for (keyno = 0; keyno < tupdesc->natts; keyno++)
+ 		mmdesc->md_info[keyno] = opcinfo[keyno];
+ 	pfree(opcinfo);
+ 
+ 	return mmdesc;
+ }
+ 
+ void
+ minmax_free_mmdesc(MinmaxDesc *mmdesc)
+ {
+ 	int		keyno;
+ 
+ 	for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 		pfree(mmdesc->md_info[keyno]);
+ 	DecrTupleDescRefCount(mmdesc->md_tupdesc);
+ 	pfree(mmdesc);
+ }
+ 
+ /*
+  * Initialize a MMBuildState appropriate to create tuples on the given index.
+  */
+ static MMBuildState *
+ initialize_mm_buildstate(Relation idxRel, mmRevmapAccess *rmAccess,
+ 						 BlockNumber pagesPerRange)
+ {
+ 	MMBuildState *mmstate;
+ 
+ 	mmstate = palloc(sizeof(MMBuildState));
+ 
+ 	mmstate->irel = idxRel;
+ 	mmstate->numtuples = 0;
+ 	mmstate->currentInsertBuf = InvalidBuffer;
+ 	mmstate->pagesPerRange = pagesPerRange;
+ 	mmstate->currRangeStart = 0;
+ 	mmstate->rmAccess = rmAccess;
+ 	mmstate->mmDesc = minmax_build_mmdesc(idxRel);
+ 	mmstate->seentup = false;
+ 	mmstate->extended = false;
+ 	mmstate->dtuple = minmax_new_dtuple(mmstate->mmDesc);
+ 
+ 	minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
+ 
+ 	return mmstate;
+ }
+ 
+ /*
+  * Release resources associated with a MMBuildState.  Returns whether the FSM
+  * should be vacuumed afterwards.
+  */
+ static bool
+ terminate_mm_buildstate(MMBuildState *mmstate)
+ {
+ 	bool	vacuumfsm;
+ 
+ 	/* release the last index buffer used */
+ 	if (!BufferIsInvalid(mmstate->currentInsertBuf))
+ 	{
+ 		Page	page;
+ 
+ 		page = BufferGetPage(mmstate->currentInsertBuf);
+ 		RecordPageWithFreeSpace(mmstate->irel,
+ 								BufferGetBlockNumber(mmstate->currentInsertBuf),
+ 								PageGetFreeSpace(page));
+ 		ReleaseBuffer(mmstate->currentInsertBuf);
+ 	}
+ 	vacuumfsm = mmstate->extended;
+ 
+ 	minmax_free_mmdesc(mmstate->mmDesc);
+ 	pfree(mmstate->dtuple);
+ 	pfree(mmstate);
+ 
+ 	return vacuumfsm;
+ }
+ 
+ /*
+  * Summarize the given page range of the given index.
+  */
+ static void
+ summarize_range(MMBuildState *mmstate, Relation heapRel, BlockNumber heapBlk)
+ {
+ 	IndexInfo  *indexInfo;
+ 
+ 	indexInfo = BuildIndexInfo(mmstate->irel);
+ 
+ 	mmstate->currRangeStart = heapBlk;
+ 
+ 	/*
+ 	 * Execute the partial heap scan covering the heap blocks in the
+ 	 * specified page range, summarizing the heap tuples in it.  This scan
+ 	 * stops just short of mmbuildCallback creating the new index entry.
+ 	 */
+ 	IndexBuildHeapRangeScan(heapRel, mmstate->irel, indexInfo, false,
+ 							heapBlk, mmstate->pagesPerRange,
+ 							mmbuildCallback, (void *) mmstate);
+ 
+ 	/*
+ 	 * Create the index tuple and insert it.  Note mmbuildCallback didn't
+ 	 * have the chance to actually insert anything into the index, because
+ 	 * the heapscan should have ended just as it reached the final tuple in
+ 	 * the range.
+ 	 */
+ 	form_and_insert_tuple(mmstate);
+ 
+ 	/* and re-initialize state for the next range */
+ 	minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
+ }
+ 
+ /*
+  * Given a deformed tuple in the build state, convert it into the on-disk
+  * format and insert it into the index, making the revmap point to it.
+  */
+ static void
+ form_and_insert_tuple(MMBuildState *mmstate)
+ {
+ 	MMTuple    *tup;
+ 	Size		size;
+ 
+ 	/* if we haven't seen any heap tuple yet, don't insert anything */
+ 	if (!mmstate->seentup)
+ 		return;
+ 
+ 	tup = minmax_form_tuple(mmstate->mmDesc, mmstate->currRangeStart,
+ 							mmstate->dtuple, &size);
+ 	mm_doinsert(mmstate->irel, mmstate->pagesPerRange, mmstate->rmAccess,
+ 				&mmstate->currentInsertBuf, mmstate->currRangeStart,
+ 				tup, size, &mmstate->extended);
+ 	mmstate->numtuples++;
+ 	pfree(tup);
+ 
+ 	mmstate->seentup = false;
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmpageops.c
***************
*** 0 ****
--- 1,638 ----
+ /*
+  * mmpageops.c
+  *		Page-handling routines for Minmax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmpageops.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax_pageops.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_xlog.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/freespace.h"
+ #include "storage/lmgr.h"
+ #include "storage/smgr.h"
+ #include "utils/rel.h"
+ 
+ 
+ static Buffer mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+ 				   bool *was_extended);
+ static Size mm_page_get_freespace(Page page);
+ 
+ 
+ /*
+  * Update tuple origtup (size origsz), located in offset oldoff of buffer
+  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
+  * at heapBlk.  If samepage is true, then attempt to put the new tuple in the same
+  * page, otherwise use some other one.
+  *
+  * If the update is done, return true; the revmap is updated to point to the
+  * new tuple.  If the update is not done for whatever reason, return false.
+  * Caller may retry the update if this happens.
+  *
+  * If the index had to be extended in the course of this operation, *extended
+  * is set to true.
+  */
+ bool
+ mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 			Buffer oldbuf, OffsetNumber oldoff,
+ 			const MMTuple *origtup, Size origsz,
+ 			const MMTuple *newtup, Size newsz,
+ 			bool samepage, bool *extended)
+ {
+ 	Page		oldpage;
+ 	ItemId		origlp;
+ 	MMTuple	   *oldtup;
+ 	Size		oldsz;
+ 	Buffer		newbuf;
+ 	MinmaxSpecialSpace *special;
+ 
+ 	if (!samepage)
+ 	{
+ 		/* need a page on which to put the item */
+ 		newbuf = mm_getinsertbuffer(idxrel, oldbuf, newsz, extended);
+ 		if (!BufferIsValid(newbuf))
+ 			return false;
+ 
+ 		/*
+ 		 * Note: it's possible (though unlikely) that the returned newbuf is
+ 		 * the same as oldbuf, if mm_getinsertbuffer determined that the old
+ 		 * buffer does in fact have enough space.
+ 		 */
+ 		if (newbuf == oldbuf)
+ 			newbuf = InvalidBuffer;
+ 	}
+ 	else
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 		newbuf = InvalidBuffer;
+ 	}
+ 	oldpage = BufferGetPage(oldbuf);
+ 	origlp = PageGetItemId(oldpage, oldoff);
+ 
+ 	/* Check that the old tuple wasn't updated concurrently */
+ 	if (!ItemIdIsNormal(origlp))
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 
+ 	oldsz = ItemIdGetLength(origlp);
+ 	oldtup = (MMTuple *) PageGetItem(oldpage, origlp);
+ 
+ 	/*
+ 	 * If both tuples are identical, there is nothing to do; except that if we
+ 	 * were requested to move the tuple across pages, we do it even if they are
+ 	 * equal.
+ 	 */
+ 	if (samepage && minmax_tuples_equal(oldtup, oldsz, origtup, origsz))
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(oldpage);
+ 
+ 	/*
+ 	 * Great, the old tuple is intact.  We can proceed with the update.
+ 	 *
+ 	 * If there's enough room on the old page for the new tuple, replace it.
+ 	 *
+ 	 * Note that there might now be enough space on the page even though
+ 	 * the caller told us there isn't, if a concurrent updated moved a tuple
+ 	 * elsewhere or replaced a tuple with a smaller one.
+ 	 */
+ 	if ((special->flags & MINMAX_EVACUATE_PAGE) == 0 &&
+ 		(newsz <= origsz || PageGetExactFreeSpace(oldpage) >= (origsz - newsz)))
+ 	{
+ 		if (BufferIsValid(newbuf))
+ 			UnlockReleaseBuffer(newbuf);
+ 
+ 		START_CRIT_SECTION();
+ 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
+ 		if (PageAddItem(oldpage, (Item) newtup, newsz, oldoff, true, false) == InvalidOffsetNumber)
+ 			elog(ERROR, "failed to add mmtuple");
+ 		MarkBufferDirty(oldbuf);
+ 
+ 		/* XLOG stuff */
+ 		if (RelationNeedsWAL(idxrel))
+ 		{
+ 			BlockNumber blk = BufferGetBlockNumber(oldbuf);
+ 			xl_minmax_samepage_update xlrec;
+ 			XLogRecPtr	recptr;
+ 			XLogRecData	rdata[2];
+ 			uint8		info = XLOG_MINMAX_SAMEPAGE_UPDATE;
+ 
+ 			xlrec.node = idxrel->rd_node;
+ 			ItemPointerSetBlockNumber(&xlrec.tid, blk);
+ 			ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
+ 			rdata[0].data = (char *) &xlrec;
+ 			rdata[0].len = SizeOfMinmaxSamepageUpdate;
+ 			rdata[0].buffer = InvalidBuffer;
+ 			rdata[0].next = &(rdata[1]);
+ 
+ 			rdata[1].data = (char *) newtup;
+ 			rdata[1].len = newsz;
+ 			rdata[1].buffer = oldbuf;
+ 			rdata[1].buffer_std = true;
+ 			rdata[1].next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 			PageSetLSN(oldpage, recptr);
+ 		}
+ 
+ 		END_CRIT_SECTION();
+ 
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return true;
+ 	}
+ 	else if (newbuf == InvalidBuffer)
+ 	{
+ 		/*
+ 		 * Not enough space, but caller said that there was. Tell them to
+ 		 * start over.
+ 		 */
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 	else
+ 	{
+ 		/*
+ 		 * Not enough free space on the oldpage. Put the new tuple on the
+ 		 * new page, and update the revmap.
+ 		 */
+ 		Page		newpage = BufferGetPage(newbuf);
+ 		Buffer		revmapbuf;
+ 		ItemPointerData newtid;
+ 		OffsetNumber newoff;
+ 
+ 		revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
+ 
+ 		START_CRIT_SECTION();
+ 
+ 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
+ 		newoff = PageAddItem(newpage, (Item) newtup, newsz, InvalidOffsetNumber, false, false);
+ 		if (newoff == InvalidOffsetNumber)
+ 			elog(ERROR, "failed to add mmtuple to new page");
+ 		MarkBufferDirty(oldbuf);
+ 		MarkBufferDirty(newbuf);
+ 
+ 		ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
+ 		mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
+ 		MarkBufferDirty(revmapbuf);
+ 
+ 		/* XLOG stuff */
+ 		if (RelationNeedsWAL(idxrel))
+ 		{
+ 			xl_minmax_update	xlrec;
+ 			XLogRecPtr	recptr;
+ 			XLogRecData	rdata[4];
+ 			uint8		info = XLOG_MINMAX_UPDATE;
+ 
+ 			xlrec.new.node = idxrel->rd_node;
+ 			ItemPointerSet(&xlrec.new.tid, BufferGetBlockNumber(newbuf), newoff);
+ 			xlrec.new.heapBlk = heapBlk;
+ 			xlrec.new.revmapBlk = BufferGetBlockNumber(revmapbuf);
+ 			xlrec.new.pagesPerRange = pagesPerRange;
+ 			ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+ 
+ 			rdata[0].data = (char *) &xlrec;
+ 			rdata[0].len = SizeOfMinmaxUpdate;
+ 			rdata[0].buffer = InvalidBuffer;
+ 			rdata[0].next = &(rdata[1]);
+ 
+ 			rdata[1].data = (char *) newtup;
+ 			rdata[1].len = newsz;
+ 			rdata[1].buffer = newbuf;
+ 			rdata[1].buffer_std = true;
+ 			rdata[1].next = &(rdata[2]);
+ 
+ 			rdata[2].data = (char *) NULL;
+ 			rdata[2].len = 0;
+ 			rdata[2].buffer = revmapbuf;
+ 			rdata[2].buffer_std = true;
+ 			rdata[2].next = &(rdata[3]);
+ 
+ 			rdata[3].data = (char *) NULL;
+ 			rdata[3].len = 0;
+ 			rdata[3].buffer = oldbuf;
+ 			rdata[3].buffer_std = true;
+ 			rdata[3].next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 			PageSetLSN(oldpage, recptr);
+ 			PageSetLSN(newpage, recptr);
+ 			PageSetLSN(BufferGetPage(revmapbuf), recptr);
+ 		}
+ 
+ 		END_CRIT_SECTION();
+ 
+ 		LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		UnlockReleaseBuffer(newbuf);
+ 		return true;
+ 	}
+ }
+ 
+ /*
+  * Insert an index tuple into the index relation.  The revmap is updated to
+  * mark the range containing the given page as pointing to the inserted entry.
+  * A WAL record is written.
+  *
+  * The buffer, if valid, is first checked for free space to insert the new
+  * entry; if there isn't enough, a new buffer is obtained and pinned.
+  *
+  * If the relation had to be extended to make room for the new index tuple,
+  * *extended is set to true.
+  */
+ void
+ mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk,
+ 			MMTuple *tup, Size itemsz, bool *extended)
+ {
+ 	Page		page;
+ 	BlockNumber blk;
+ 	OffsetNumber off;
+ 	Buffer		revmapbuf;
+ 	ItemPointerData tid;
+ 
+ 	itemsz = MAXALIGN(itemsz);
+ 
+ 	/*
+ 	 * Lock the revmap page for the update. Note that this may require
+ 	 * extending the revmap, which in turn may require moving the currently
+ 	 * pinned index block out of the way.
+ 	 */
+ 	revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
+ 
+ 	/*
+ 	 * Obtain a locked buffer to insert the new tuple.  Note mm_getinsertbuffer
+ 	 * ensures there's enough space in the returned buffer.
+ 	 */
+ 	if (BufferIsValid(*buffer))
+ 	{
+ 		/*
+ 		 * It's possible that another backend (or ourselves!) extended the
+ 		 * revmap over the page we held a pin on, so we cannot assume that
+ 		 * it's still a regular page.
+ 		 */
+ 		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ 		if (mm_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
+ 		{
+ 			UnlockReleaseBuffer(*buffer);
+ 			*buffer = InvalidBuffer;
+ 		}
+ 	}
+ 
+ 	if (!BufferIsValid(*buffer))
+ 	{
+ 		*buffer = mm_getinsertbuffer(idxrel, InvalidBuffer, itemsz, extended);
+ 		Assert(BufferIsValid(*buffer));
+ 		Assert(mm_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
+ 	}
+ 
+ 	page = BufferGetPage(*buffer);
+ 	blk = BufferGetBlockNumber(*buffer);
+ 
+ 	START_CRIT_SECTION();
+ 	off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
+ 					  false, false);
+ 	if (off == InvalidOffsetNumber)
+ 		elog(ERROR, "could not insert new index tuple to page");
+ 	MarkBufferDirty(*buffer);
+ 
+ 	MINMAX_elog(DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
+ 				blk, off, heapBlk);
+ 
+ 	ItemPointerSet(&tid, blk, off);
+ 	mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
+ 	MarkBufferDirty(revmapbuf);
+ 
+ 	/* XLOG stuff */
+ 	if (RelationNeedsWAL(idxrel))
+ 	{
+ 		xl_minmax_insert	xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata[2];
+ 		uint8		info = XLOG_MINMAX_INSERT;
+ 
+ 		xlrec.node = idxrel->rd_node;
+ 		xlrec.heapBlk = heapBlk;
+ 		xlrec.pagesPerRange = pagesPerRange;
+ 		xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
+ 		ItemPointerSet(&xlrec.tid, blk, off);
+ 
+ 		rdata[0].data = (char *) &xlrec;
+ 		rdata[0].len = SizeOfMinmaxInsert;
+ 		rdata[0].buffer = InvalidBuffer;
+ 		rdata[0].buffer_std = false;
+ 		rdata[0].next = &(rdata[1]);
+ 
+ 		rdata[1].data = (char *) tup;
+ 		rdata[1].len = itemsz;
+ 		rdata[1].buffer = *buffer;
+ 		rdata[1].buffer_std = true;
+ 		rdata[1].next = NULL;
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 		PageSetLSN(page, recptr);
+ 		PageSetLSN(BufferGetPage(revmapbuf), recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	/* Tuple is firmly on buffer; we can release our locks */
+ 	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ 	LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+ }
+ 
+ /*
+  * Initiate page evacuation protocol.
+  *
+  * The page must be locked in exclusive mode by the caller.
+  *
+  * If the page is not yet initialized or empty, return false without doing
+  * anything; it can be used for revmap without any further changes.  If it
+  * contains tuples, mark it for evacuation and return true.
+  */
+ bool
+ mm_start_evacuating_page(Relation idxRel, Buffer buf)
+ {
+ 	OffsetNumber off;
+ 	OffsetNumber maxoff;
+ 	MinmaxSpecialSpace *special;
+ 	Page		page;
+ 
+ 	page = BufferGetPage(buf);
+ 
+ 	if (PageIsNew(page))
+ 		return false;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	maxoff = PageGetMaxOffsetNumber(page);
+ 	for (off = FirstOffsetNumber; off <= maxoff; off++)
+ 	{
+ 		ItemId		lp;
+ 
+ 		lp = PageGetItemId(page, off);
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			/* prevent other backends from adding more stuff to this page */
+ 			special->flags |= MINMAX_EVACUATE_PAGE;
+ 			MarkBufferDirtyHint(buf, true);
+ 
+ 			return true;
+ 		}
+ 	}
+ 	return false;
+ }
+ 
+ /*
+  * Move all tuples out of a page.
+  *
+  * The caller must hold lock on the page. The lock and pin are released.
+  */
+ void
+ mm_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, mmRevmapAccess *rmAccess, Buffer buf)
+ {
+ 	OffsetNumber off;
+ 	OffsetNumber maxoff;
+ 	MinmaxSpecialSpace *special;
+ 	Page		page;
+ 	bool		extended = false;
+ 
+ 	page = BufferGetPage(buf);
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	Assert(special->flags & MINMAX_EVACUATE_PAGE);
+ 
+ 	maxoff = PageGetMaxOffsetNumber(page);
+ 	for (off = FirstOffsetNumber; off <= maxoff; off++)
+ 	{
+ 		MMTuple	   *tup;
+ 		Size		sz;
+ 		ItemId		lp;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		lp = PageGetItemId(page, off);
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			sz = ItemIdGetLength(lp);
+ 			tup = (MMTuple *) PageGetItem(page, lp);
+ 			tup = minmax_copy_tuple(tup, sz);
+ 
+ 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 			if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, tup->mt_blkno, buf,
+ 							 off, tup, sz, tup, sz, false, &extended))
+ 				off--; /* retry */
+ 
+ 			LockBuffer(buf, BUFFER_LOCK_SHARE);
+ 
+ 			/* It's possible that someone extended the revmap over this page */
+ 			if (!MINMAX_IS_REGULAR_PAGE(page))
+ 				break;
+ 		}
+ 	}
+ 
+ 	UnlockReleaseBuffer(buf);
+ 
+ 	if (extended)
+ 		FreeSpaceMapVacuum(idxRel);
+ }
+ 
+ /*
+  * Return a pinned and locked buffer which can be used to insert an index item
+  * of size itemsz.  If oldbuf is a valid buffer, it is also locked (in a order
+  * determined to avoid deadlocks.)
+  *
+  * If there's no existing page with enough free space to accomodate the new
+  * item, the relation is extended.  If this happens, *extended is set to true.
+  *
+  * If we find that the old page is no longer a regular index page (because
+  * of a revmap extension), the old buffer is unlocked and we return
+  * InvalidBuffer.
+  */
+ static Buffer
+ mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+ 				   bool *was_extended)
+ {
+ 	BlockNumber oldblk;
+ 	BlockNumber newblk;
+ 	Page		page;
+ 	int			freespace;
+ 	bool		extended = false;
+ 
+ 	if (BufferIsValid(oldbuf))
+ 		oldblk = BufferGetBlockNumber(oldbuf);
+ 	else
+ 		oldblk = InvalidBlockNumber;
+ 
+ 	/*
+ 	 * Loop until we find a page with sufficient free space.  By the time we
+ 	 * return to caller out of this loop, both buffers are valid and locked;
+ 	 * if we have to restart here, neither buffer is locked and buf is not
+ 	 * a pinned buffer.
+ 	 */
+ 	newblk = RelationGetTargetBlock(irel);
+ 	if (newblk == InvalidBlockNumber)
+ 		newblk = GetPageWithFreeSpace(irel, itemsz);
+ 	for (;;)
+ 	{
+ 		Buffer		buf;
+ 		bool		extensionLockHeld = false;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		if (newblk == InvalidBlockNumber)
+ 		{
+ 			/*
+ 			 * There's not enough free space in any existing index page,
+ 			 * according to the FSM: extend the relation to obtain a shiny
+ 			 * new page.
+ 			 */
+ 			if (!RELATION_IS_LOCAL(irel))
+ 			{
+ 				LockRelationForExtension(irel, ExclusiveLock);
+ 				extensionLockHeld = true;
+ 			}
+ 			buf = ReadBuffer(irel, P_NEW);
+ 			extended = true;
+ 
+ 			MINMAX_elog(DEBUG2, "mm_getinsertbuffer: extending to page %u",
+ 						BufferGetBlockNumber(buf));
+ 		}
+ 		else if (newblk == oldblk)
+ 		{
+ 			/*
+ 			 * There's an odd corner-case here where the FSM is out-of-date,
+ 			 * and gave us the old page.
+ 			 */
+ 			buf = oldbuf;
+ 		}
+ 		else
+ 		{
+ 			buf = ReadBuffer(irel, newblk);
+ 		}
+ 
+ 		/*
+ 		 * We lock the old buffer first, if it's earlier than the new one.
+ 		 * We also need to check that it hasn't been turned into a revmap
+ 		 * page concurrently; if we detect that it happened, give up and
+ 		 * tell caller to start over.
+ 		 */
+ 		if (BufferIsValid(oldbuf) && oldblk < newblk)
+ 		{
+ 			LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 			if (!MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
+ 			{
+ 				LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 				ReleaseBuffer(buf);
+ 				return InvalidBuffer;
+ 			}
+ 		}
+ 
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 		if (extensionLockHeld)
+ 			UnlockRelationForExtension(irel, ExclusiveLock);
+ 
+ 		page = BufferGetPage(buf);
+ 
+ 		if (extended)
+ 			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
+ 
+ 		/*
+ 		 * We have a new buffer from FSM now.  Check that the new page has
+ 		 * enough free space, and return it if it does; otherwise start over.
+ 		 * Note that we allow for the FSM to be out of date here, and in that
+ 		 * case we update it and move on.
+ 		 *
+ 		 * (mm_page_get_freespace also checks that the FSM didn't hand us a
+ 		 * page that has since been repurposed for the revmap.)
+ 		 */
+ 		freespace = mm_page_get_freespace(page);
+ 		if (freespace >= itemsz)
+ 		{
+ 			if (extended)
+ 				*was_extended = true;
+ 
+ 			RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
+ 
+ 			/*
+ 			 * Lock the old buffer if not locked already.  Note that in this
+ 			 * case we know for sure it's a regular page: it's later than the
+ 			 * new page we just got, which is not a revmap page, and revmap
+ 			 * pages are always consecutive.
+ 			 */
+ 			if (BufferIsValid(oldbuf) && oldblk > newblk)
+ 			{
+ 				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 				Assert(MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
+ 			}
+ 
+ 			return buf;
+ 		}
+ 
+ 		/* This page is no good. */
+ 
+ 		/*
+ 		 * If an entirely new page does not contain enough free space for
+ 		 * the new item, then surely that item is oversized.  Complain
+ 		 * loudly; but first make sure we record the page as free, for
+ 		 * next time.
+ 		 */
+ 		if (extended)
+ 		{
+ 			RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
+ 									freespace);
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ 					 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ 							(unsigned long) itemsz,
+ 							(unsigned long) freespace,
+ 							RelationGetRelationName(irel))));
+ 			return InvalidBuffer;	/* keep compiler quiet */
+ 		}
+ 
+ 		if (newblk != oldblk)
+ 			UnlockReleaseBuffer(buf);
+ 		if (BufferIsValid(oldbuf) && oldblk < newblk)
+ 			LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 
+ 		newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
+ 	}
+ }
+ 
+ /*
+  * Return the amount of free space on a regular minmax index page.
+  *
+  * If the page is not a regular page, or has been marked with the
+  * MINMAX_EVACUATE_PAGE flag, returns 0.
+  */
+ static Size
+ mm_page_get_freespace(Page page)
+ {
+ 	MinmaxSpecialSpace *special;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	if (!MINMAX_IS_REGULAR_PAGE(page) ||
+ 		(special->flags & MINMAX_EVACUATE_PAGE) != 0)
+ 		return 0;
+ 	else
+ 		return PageGetFreeSpace(page);
+ 
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmrevmap.c
***************
*** 0 ****
--- 1,451 ----
+ /*
+  * mmrevmap.c
+  *		Reverse range map for MinMax indexes
+  *
+  * The reverse range map (revmap) is a translation structure for minmax
+  * indexes: for each page range there is one summary tuple, and its location is
+  * tracked by the revmap.  Whenever a new tuple is inserted into a table that
+  * violates the previously recorded summary values, a new tuple is inserted
+  * into the index and the revmap is updated to point to it.
+  *
+  * The revmap is stored in the first pages of the index, immediately following
+  * the metapage.  When the revmap needs to be expanded, all tuples on the
+  * regular minmax page at that block (if any) are moved out of the way.
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmrevmap.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/xlog.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_pageops.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
+ #include "access/minmax_xlog.h"
+ #include "access/rmgr.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/lmgr.h"
+ #include "utils/rel.h"
+ 
+ 
+ /*
+  * In revmap pages, each item stores an ItemPointerData.  These defines let one
+  * find the logical revmap page number and index number of the revmap item for
+  * the given heap block number.
+  */
+ #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
+ 	((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
+ #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
+ 	((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
+ 
+ 
+ struct mmRevmapAccess
+ {
+ 	Relation	idxrel;
+ 	BlockNumber pagesPerRange;
+ 	BlockNumber lastRevmapPage;		/* cached from the metapage */
+ 	Buffer		metaBuf;
+ 	Buffer		currBuf;
+ };
+ /* typedef appears in minmax_revmap.h */
+ 
+ 
+ static BlockNumber rm_get_phys_blkno(mmRevmapAccess *rmAccess,
+ 				  BlockNumber mapBlk, bool extend);
+ static void rm_extend(mmRevmapAccess *rmAccess);
+ 
+ /*
+  * Initialize an access object for a reverse range map, which can be used to
+  * read stuff from it.	This must be freed by mmRevmapAccessTerminate when caller
+  * is done with it.
+  */
+ mmRevmapAccess *
+ mmRevmapAccessInit(Relation idxrel, BlockNumber *pagesPerRange)
+ {
+ 	mmRevmapAccess *rmAccess;
+ 	Buffer		meta;
+ 	MinmaxMetaPageData *metadata;
+ 
+ 	meta = ReadBuffer(idxrel, MINMAX_METAPAGE_BLKNO);
+ 	LockBuffer(meta, BUFFER_LOCK_SHARE);
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(BufferGetPage(meta));
+ 
+ 	rmAccess = palloc(sizeof(mmRevmapAccess));
+ 	rmAccess->idxrel = idxrel;
+ 	rmAccess->pagesPerRange = metadata->pagesPerRange;
+ 	rmAccess->lastRevmapPage = metadata->lastRevmapPage;
+ 	rmAccess->metaBuf = meta;
+ 	rmAccess->currBuf = InvalidBuffer;
+ 
+ 	*pagesPerRange = metadata->pagesPerRange;
+ 
+ 	LockBuffer(meta, BUFFER_LOCK_UNLOCK);
+ 
+ 	return rmAccess;
+ }
+ 
+ /*
+  * Release resources associated with a revmap access object.
+  */
+ void
+ mmRevmapAccessTerminate(mmRevmapAccess *rmAccess)
+ {
+ 	ReleaseBuffer(rmAccess->metaBuf);
+ 	if (rmAccess->currBuf != InvalidBuffer)
+ 		ReleaseBuffer(rmAccess->currBuf);
+ 	pfree(rmAccess);
+ }
+ 
+ /*
+  * Prepare for updating an entry in the revmap.
+  *
+  * The map is extended, if necessary.
+  */
+ Buffer
+ mmLockRevmapPageForUpdate(mmRevmapAccess *rmAccess, BlockNumber heapBlk)
+ {
+ 	BlockNumber mapBlk;
+ 
+ 	/*
+ 	 * Translate the map block number to physical location.  Note this extends
+ 	 * the revmap, if necessary.
+ 	 */
+ 	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
+ 	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, true);
+ 	Assert(mapBlk != InvalidBlockNumber);
+ 
+ 	MINMAX_elog(DEBUG2, "locking revmap page for logical page %lu (physical %u) for heap %u",
+ 				HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk),
+ 				mapBlk, heapBlk);
+ 
+ 	/*
+ 	 * Obtain the buffer from which we need to read.  If we already have the
+ 	 * correct buffer in our access struct, use that; otherwise, release that,
+ 	 * (if valid) and read the one we need.
+ 	 */
+ 	if (rmAccess->currBuf == InvalidBuffer ||
+ 		mapBlk != BufferGetBlockNumber(rmAccess->currBuf))
+ 	{
+ 		if (rmAccess->currBuf != InvalidBuffer)
+ 			ReleaseBuffer(rmAccess->currBuf);
+ 
+ 		rmAccess->currBuf = ReadBuffer(rmAccess->idxrel, mapBlk);
+ 	}
+ 
+ 	LockBuffer(rmAccess->currBuf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 	return rmAccess->currBuf;
+ }
+ 
+ /*
+  * In the given revmap buffer (locked appropriately by caller), which is used
+  * in a minmax index of pagesPerRange pages per range, set the element
+  * corresponding to heap block number heapBlk to the given TID.
+  *
+  * Once the operation is complete, the caller must update the LSN on the
+  * returned buffer.
+  *
+  * This is used both in regular operation and during WAL replay.
+  */
+ void
+ mmSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange, BlockNumber heapBlk,
+ 					  ItemPointerData tid)
+ {
+ 	RevmapContents *contents;
+ 	ItemPointerData *iptr;
+ 	Page		page;
+ 
+ 	/* The correct page should already be pinned and locked */
+ 	page = BufferGetPage(buf);
+ 	contents = (RevmapContents *) PageGetContents(page);
+ 	iptr = (ItemPointerData *) contents->rmr_tids;
+ 	iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
+ 
+ 	ItemPointerSet(iptr,
+ 				   ItemPointerGetBlockNumber(&tid),
+ 				   ItemPointerGetOffsetNumber(&tid));
+ }
+ 
+ /*
+  * Fetch the MMTuple for a given heap block.
+  *
+  * The buffer containing the tuple is locked, and returned in *buf. As an
+  * optimization, the caller can pass a pinned buffer *buf on entry, which will
+  * avoid a pin-unpin cycle when the next tuple is on the same page as previous
+  * one.
+  *
+  * If no tuple is found for the given heap range, returns NULL. In that case,
+  * *buf might still be updated, but it's not locked.
+  *
+  * The output tuple offset within the buffer is returned in *off.
+  */
+ MMTuple *
+ mmGetMMTupleForHeapBlock(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 						 Buffer *buf, OffsetNumber *off, int mode)
+ {
+ 	Relation	idxRel = rmAccess->idxrel;
+ 	BlockNumber mapBlk;
+ 	RevmapContents *contents;
+ 	ItemPointerData *iptr;
+ 	BlockNumber	blk;
+ 	Page		page;
+ 	ItemId		lp;
+ 	MMTuple	   *mmtup;
+ 	ItemPointerData previptr;
+ 
+ 	/* normalize the heap block number to be the first page in the range */
+ 	heapBlk = (heapBlk / rmAccess->pagesPerRange) * rmAccess->pagesPerRange;
+ 
+ 	/* Compute the revmap page number we need */
+ 	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
+ 	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, false);
+ 	if (mapBlk == InvalidBlockNumber)
+ 	{
+ 		*off = InvalidOffsetNumber;
+ 		return NULL;
+ 	}
+ 
+ 	ItemPointerSetInvalid(&previptr);
+ 	for (;;)
+ 	{
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		if (rmAccess->currBuf == InvalidBuffer ||
+ 			BufferGetBlockNumber(rmAccess->currBuf) != mapBlk)
+ 		{
+ 			if (rmAccess->currBuf != InvalidBuffer)
+ 				ReleaseBuffer(rmAccess->currBuf);
+ 
+ 			Assert(mapBlk != InvalidBlockNumber);
+ 			rmAccess->currBuf = ReadBuffer(rmAccess->idxrel, mapBlk);
+ 		}
+ 
+ 		LockBuffer(rmAccess->currBuf, BUFFER_LOCK_SHARE);
+ 
+ 		contents = (RevmapContents *)
+ 			PageGetContents(BufferGetPage(rmAccess->currBuf));
+ 		iptr = contents->rmr_tids;
+ 		iptr += HEAPBLK_TO_REVMAP_INDEX(rmAccess->pagesPerRange, heapBlk);
+ 
+ 		if (!ItemPointerIsValid(iptr))
+ 		{
+ 			LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK);
+ 			return NULL;
+ 		}
+ 
+ 		/*
+ 		 * Save the current TID we got from the revmap; if we loop we can
+ 		 * sanity-check that the new one is different.  Otherwise we might
+ 		 * be stuck looping forever if the revmap is somehow badly broken.
+ 		 */
+ 		if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr))
+ 			ereport(ERROR,
+ 					/* FIXME improve message */
+ 					(errmsg("revmap was updated but still contains same TID as before")));
+ 		previptr = *iptr;
+ 
+ 		blk = ItemPointerGetBlockNumber(iptr);
+ 		*off = ItemPointerGetOffsetNumber(iptr);
+ 
+ 		LockBuffer(rmAccess->currBuf, BUFFER_LOCK_UNLOCK);
+ 
+ 		/* Ok, got a pointer to where the MMTuple should be. Fetch it. */
+ 		if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk)
+ 		{
+ 			if (BufferIsValid(*buf))
+ 				ReleaseBuffer(*buf);
+ 			*buf = ReadBuffer(idxRel, blk);
+ 		}
+ 		LockBuffer(*buf, mode);
+ 		page = BufferGetPage(*buf);
+ 
+ 		/* If we land on a revmap page, start over */
+ 		if (MINMAX_IS_REGULAR_PAGE(page))
+ 		{
+ 			lp = PageGetItemId(page, *off);
+ 			if (ItemIdIsUsed(lp))
+ 			{
+ 				mmtup = (MMTuple *) PageGetItem(page, lp);
+ 
+ 				if (mmtup->mt_blkno == heapBlk)
+ 				{
+ 					/* found it! */
+ 					return mmtup;
+ 				}
+ 			}
+ 		}
+ 
+ 		/*
+ 		 * No luck. Assume that the revmap was updated concurrently.
+ 		 */
+ 		LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+ 	}
+ 	/* not reached, but keep compiler quiet */
+ 	return NULL;
+ }
+ 
+ /*
+  * Given a logical revmap block number, find its physical block number.
+  *
+  * If extend is set to true, and the page hasn't been set yet, extend the
+  * array to point to a newly allocated page.
+  */
+ static BlockNumber
+ rm_get_phys_blkno(mmRevmapAccess *rmAccess, BlockNumber mapBlk, bool extend)
+ {
+ 	BlockNumber targetblk;
+ 
+ 	/* skip the metapage to obtain physical block numbers of revmap pages */
+ 	targetblk = mapBlk + 1;
+ 
+ 	/* Normal case: the revmap page is already allocated */
+ 	if (targetblk <= rmAccess->lastRevmapPage)
+ 		return targetblk;
+ 
+ 	if (!extend)
+ 		return InvalidBlockNumber;
+ 
+ 	/* Extend the revmap */
+ 	while (targetblk > rmAccess->lastRevmapPage)
+ 		rm_extend(rmAccess);
+ 
+ 	return targetblk;
+ }
+ 
+ /*
+  * Extend the revmap by one page.
+  *
+  * However, if the revmap was extended by someone else concurrently, we might
+  * return without actually doing anything.
+  *
+  * If there is an existing minmax page at that block, it is atomically moved
+  * out of the way, and the redirect pointer on the new revmap page is set
+  * to point to its new location.
+  */
+ static void
+ rm_extend(mmRevmapAccess *rmAccess)
+ {
+ 	Buffer		buf;
+ 	Page		page;
+ 	Page		metapage;
+ 	MinmaxMetaPageData *metadata;
+ 	BlockNumber	mapBlk;
+ 	BlockNumber nblocks;
+ 	Relation	irel = rmAccess->idxrel;
+ 	bool		needLock = !RELATION_IS_LOCAL(irel);
+ 
+ 	/*
+ 	 * Lock the metapage. This locks out concurrent extensions of the revmap,
+ 	 * but note that we still need to grab the relation extension lock because
+ 	 * another backend can extend the index with regular minmax pages.
+ 	 */
+ 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_EXCLUSIVE);
+ 	metapage = BufferGetPage(rmAccess->metaBuf);
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(metapage);
+ 
+ 	/*
+ 	 * Check that our cached lastRevmapPage value was up-to-date; if it wasn't,
+ 	 * update the cached copy and have caller start over.
+ 	 */
+ 	if (metadata->lastRevmapPage != rmAccess->lastRevmapPage)
+ 	{
+ 		rmAccess->lastRevmapPage = metadata->lastRevmapPage;
+ 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 		return;
+ 	}
+ 	mapBlk = metadata->lastRevmapPage + 1;
+ 
+ 	nblocks = RelationGetNumberOfBlocks(irel);
+ 	if (mapBlk < nblocks)
+ 	{
+ 		buf = ReadBuffer(irel, mapBlk);
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 		page = BufferGetPage(buf);
+ 	}
+ 	else
+ 	{
+ 		if (needLock)
+ 			LockRelationForExtension(irel, ExclusiveLock);
+ 
+ 		buf = ReadBuffer(irel, P_NEW);
+ 		if (BufferGetBlockNumber(buf) != mapBlk)
+ 		{
+ 			/*
+ 			 * Very rare corner case: somebody extended the relation
+ 			 * concurrently after we read its length.  If this happens, give up
+ 			 * and have caller start over.  We will have to evacuate that page
+ 			 * from under whoever is using it.
+ 			 */
+ 			if (needLock)
+ 				UnlockRelationForExtension(irel, ExclusiveLock);
+ 			LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 			return;
+ 		}
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 		page = BufferGetPage(buf);
+ 
+ 		if (needLock)
+ 			UnlockRelationForExtension(irel, ExclusiveLock);
+ 	}
+ 
+ 	/* Check that it's a regular block (or an empty page) */
+ 	if (!PageIsNew(page) && !MINMAX_IS_REGULAR_PAGE(page))
+ 		elog(ERROR, "unexpected minmax page type: 0x%04X",
+ 			 MINMAX_PAGE_TYPE(page));
+ 
+ 	/* If the page is in use, evacuate it and restart */
+ 	if (mm_start_evacuating_page(irel, buf))
+ 	{
+ 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 		mm_evacuate_page(irel, rmAccess->pagesPerRange, rmAccess, buf);
+ 
+ 		/* have caller start over */
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * Ok, we have now locked the metapage and the target block. Re-initialize
+ 	 * it as a revmap page.
+ 	 */
+ 	START_CRIT_SECTION();
+ 
+ 	/* the rmr_tids array is initialized to all invalid by PageInit */
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 	MarkBufferDirty(buf);
+ 
+ 	metadata->lastRevmapPage = mapBlk;
+ 	MarkBufferDirty(rmAccess->metaBuf);
+ 
+ 	if (RelationNeedsWAL(rmAccess->idxrel))
+ 	{
+ 		xl_minmax_revmap_extend xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata;
+ 
+ 		xlrec.node = rmAccess->idxrel->rd_node;
+ 		xlrec.targetBlk = mapBlk;
+ 
+ 		rdata.data = (char *) &xlrec;
+ 		rdata.len = SizeOfMinmaxRevmapExtend;
+ 		rdata.buffer = InvalidBuffer;
+ 		rdata.buffer_std = false;
+ 		rdata.next = NULL;
+ 
+ 		/* FIXME don't we need to log the metapage buffer also? */
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_REVMAP_EXTEND, &rdata);
+ 		PageSetLSN(metapage, recptr);
+ 		PageSetLSN(page, recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 
+ 	UnlockReleaseBuffer(buf);
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmsortable.c
***************
*** 0 ****
--- 1,287 ----
+ /*
+  * minmax_sortable.c
+  *		Implementation of Minmax indexes for sortable datatypes
+  *		(that is, anything with a btree opclass)
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmsortable.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/genam.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_tuple.h"
+ #include "access/skey.h"
+ #include "catalog/pg_type.h"
+ #include "utils/datum.h"
+ #include "utils/lsyscache.h"
+ #include "utils/syscache.h"
+ 
+ 
+ /*
+  * Procedure numbers must not collide with MINMAX_PROCNUM defines in
+  * minmax_internal.h.  Note we only need inequality functions.
+  */
+ #define		SORTABLE_NUM_PROCNUMS	4	/* # support procs we need */
+ #define		PROCNUM_LESS			4
+ #define		PROCNUM_LESSEQUAL		5
+ #define		PROCNUM_GREATEREQUAL	6
+ #define		PROCNUM_GREATER			7
+ 
+ /* subtract this from procnum to obtain index in SortableOpaque arrays */
+ #define		PROCNUM_BASE			4
+ 
+ static FmgrInfo *mmsrt_get_procinfo(MinmaxDesc *mmdesc, uint16 attno,
+ 				   uint16 procnum);
+ 
+ PG_FUNCTION_INFO_V1(mmSortableAddValue);
+ PG_FUNCTION_INFO_V1(mmSortableConsistent);
+ 
+ 
+ typedef struct SortableOpaque
+ {
+ 	FmgrInfo	operators[SORTABLE_NUM_PROCNUMS];
+ 	bool		inited[SORTABLE_NUM_PROCNUMS];
+ } SortableOpaque;
+ 
+ #define	OPCINFO(typname, typoid)											\
+ PG_FUNCTION_INFO_V1(mmSortableOpcInfo_##typname);							\
+ Datum																		\
+ mmSortableOpcInfo_##typname(PG_FUNCTION_ARGS)								\
+ {																			\
+ 	SortableOpaque *opaque;													\
+ 	MinmaxOpcInfo *result;													\
+ 																			\
+ 	opaque = palloc0(sizeof(SortableOpaque));								\
+ 	/*																		\
+ 	 * 'operators' is initialized lazily, as indicated by 'inited' which was \
+ 	 * initialized to all false by palloc0.									\
+ 	 */																		\
+ 																			\
+ 	result = palloc(SizeofMinmaxOpcInfo(2));	/* min, max */				\
+ 	result->oi_nstored = 2;													\
+ 	result->oi_opaque = opaque;												\
+ 	result->oi_typids[0] = typoid;											\
+ 	result->oi_typids[1] = typoid;											\
+ 																			\
+ 	PG_RETURN_POINTER(result);												\
+ }
+ 
+ OPCINFO(int4, INT4OID)
+ OPCINFO(numeric, NUMERICOID)
+ OPCINFO(text, TEXTOID)
+ OPCINFO(time, TIMEOID)
+ OPCINFO(timetz, TIMETZOID)
+ OPCINFO(timestamp, TIMESTAMPOID)
+ OPCINFO(timestamptz, TIMESTAMPTZOID)
+ OPCINFO(date, DATEOID)
+ OPCINFO(char, CHAROID)
+ 
+ /*
+  * Examine the given index tuple (which contains partial status of a certain
+  * page range) by comparing it to the given value that comes from another heap
+  * tuple.  If the new value is outside the domain specified by the existing
+  * tuple values, update the index range and return true.  Otherwise, return
+  * false and do not modify in this case.
+  */
+ Datum
+ mmSortableAddValue(PG_FUNCTION_ARGS)
+ {
+ 	MinmaxDesc	   *mmdesc = (MinmaxDesc *) PG_GETARG_POINTER(0);
+ 	DeformedMMTuple *dtuple = (DeformedMMTuple *) PG_GETARG_POINTER(1);
+ 	AttrNumber		attno = PG_GETARG_UINT16(2);
+ 	Datum			newval = PG_GETARG_DATUM(3);
+ 	bool			isnull = PG_GETARG_DATUM(4);
+ 	Oid				colloid = PG_GET_COLLATION();
+ 	FmgrInfo	   *cmpFn;
+ 	Datum			compar;
+ 	bool			updated = false;
+ 
+ 	/*
+ 	 * If the new value is null, we record that we saw it if it's the first
+ 	 * one; otherwise, there's nothing to do.
+ 	 */
+ 	if (isnull)
+ 	{
+ 		if (dtuple->dt_columns[attno - 1].hasnulls)
+ 			PG_RETURN_BOOL(false);
+ 
+ 		dtuple->dt_columns[attno - 1].hasnulls = true;
+ 		PG_RETURN_BOOL(true);
+ 	}
+ 
+ 	/*
+ 	 * If the recorded value is null, store the new value (which we know to be
+ 	 * not null) as both minimum and maximum, and we're done.
+ 	 */
+ 	if (dtuple->dt_columns[attno - 1].allnulls)
+ 	{
+ 		dtuple->dt_columns[attno - 1].values[0] =
+ 			datumCopy(newval, mmdesc->md_tupdesc->attrs[attno - 1]->attbyval,
+ 					  mmdesc->md_tupdesc->attrs[attno - 1]->attlen);
+ 		dtuple->dt_columns[attno - 1].values[1] =
+ 			datumCopy(newval, mmdesc->md_tupdesc->attrs[attno - 1]->attbyval,
+ 					  mmdesc->md_tupdesc->attrs[attno - 1]->attlen);
+ 		dtuple->dt_columns[attno - 1].allnulls = false;
+ 		PG_RETURN_BOOL(true);
+ 	}
+ 
+ 	/*
+ 	 * Otherwise, need to compare the new value with the existing boundaries
+ 	 * and update them accordingly.  First check if it's less than the existing
+ 	 * minimum.
+ 	 */
+ 	cmpFn = mmsrt_get_procinfo(mmdesc, attno, PROCNUM_LESS);
+ 	compar = FunctionCall2Coll(cmpFn, colloid, newval,
+ 							   dtuple->dt_columns[attno - 1].values[0]);
+ 	if (DatumGetBool(compar))
+ 	{
+ 		dtuple->dt_columns[attno - 1].values[0] =
+ 			datumCopy(newval, mmdesc->md_tupdesc->attrs[attno - 1]->attbyval,
+ 					  mmdesc->md_tupdesc->attrs[attno - 1]->attlen);
+ 		updated = true;
+ 	}
+ 
+ 	/*
+ 	 * And now compare it to the existing maximum.
+ 	 */
+ 	cmpFn = mmsrt_get_procinfo(mmdesc, attno, PROCNUM_GREATER);
+ 	compar = FunctionCall2Coll(cmpFn, colloid, newval,
+ 							   dtuple->dt_columns[attno - 1].values[1]);
+ 	if (DatumGetBool(compar))
+ 	{
+ 		dtuple->dt_columns[attno - 1].values[1] =
+ 			datumCopy(newval, mmdesc->md_tupdesc->attrs[attno - 1]->attbyval,
+ 					  mmdesc->md_tupdesc->attrs[attno - 1]->attlen);
+ 		updated = true;
+ 	}
+ 
+ 	PG_RETURN_BOOL(updated);
+ }
+ 
+ /*
+  * Given an index tuple corresponding to a certain page range and a scan key,
+  * return whether the scan key is consistent with the index tuple.  Return true
+  * if so, false otherwise.
+  */
+ Datum
+ mmSortableConsistent(PG_FUNCTION_ARGS)
+ {
+ 	MinmaxDesc *mmdesc = (MinmaxDesc *) PG_GETARG_POINTER(0);
+ 	DeformedMMTuple *dtup = (DeformedMMTuple *) PG_GETARG_POINTER(1);
+ 	ScanKey		key = (ScanKey) PG_GETARG_POINTER(2);
+ 	Oid			colloid = PG_GET_COLLATION();
+ 	AttrNumber	attno = key->sk_attno;
+ 	Datum		value;
+ 	Datum		matches;
+ 
+ 	/* handle IS NULL/IS NOT NULL tests */
+ 	if (key->sk_flags & SK_ISNULL)
+ 	{
+ 		if (key->sk_flags & SK_SEARCHNULL)
+ 		{
+ 			if (dtup->dt_columns[attno - 1].allnulls ||
+ 				dtup->dt_columns[attno - 1].hasnulls)
+ 				PG_RETURN_BOOL(true);
+ 			PG_RETURN_BOOL(false);
+ 		}
+ 
+ 		/*
+ 		 * For IS NOT NULL we can only exclude blocks if all values are nulls.
+ 		 */
+ 		Assert(key->sk_flags & SK_SEARCHNOTNULL);
+ 		if (dtup->dt_columns[attno - 1].allnulls)
+ 			PG_RETURN_BOOL(false);
+ 		PG_RETURN_BOOL(true);
+ 	}
+ 
+ 	value = key->sk_argument;
+ 	switch (key->sk_strategy)
+ 	{
+ 		case BTLessStrategyNumber:
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_LESS),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[0],
+ 										value);
+ 			break;
+ 		case BTLessEqualStrategyNumber:
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_LESSEQUAL),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[0],
+ 										value);
+ 			break;
+ 		case BTEqualStrategyNumber:
+ 
+ 			/*
+ 			 * In the equality case (WHERE col = someval), we want to return
+ 			 * the current page range if the minimum value in the range <= scan
+ 			 * key, and the maximum value >= scan key.
+ 			 */
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_LESSEQUAL),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[0],
+ 										value);
+ 			if (!DatumGetBool(matches))
+ 				break;
+ 			/* max() >= scankey */
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_GREATEREQUAL),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[1],
+ 										value);
+ 			break;
+ 		case BTGreaterEqualStrategyNumber:
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_GREATEREQUAL),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[1],
+ 										value);
+ 			break;
+ 		case BTGreaterStrategyNumber:
+ 			matches = FunctionCall2Coll(mmsrt_get_procinfo(mmdesc, attno,
+ 														   PROCNUM_GREATER),
+ 										colloid,
+ 										dtup->dt_columns[attno - 1].values[1],
+ 										value);
+ 			break;
+ 		default:
+ 			/* shouldn't happen */
+ 			elog(ERROR, "invalid strategy number %d", key->sk_strategy);
+ 			matches = 0;
+ 			break;
+ 	}
+ 
+ 	PG_RETURN_DATUM(matches);
+ }
+ 
+ /*
+  * Return the procedure corresponding to the given function support number.
+  */
+ static FmgrInfo *
+ mmsrt_get_procinfo(MinmaxDesc *mmdesc, uint16 attno, uint16 procnum)
+ {
+ 	SortableOpaque *opaque;
+ 	uint16	basenum = procnum - PROCNUM_BASE;
+ 
+ 	opaque = (SortableOpaque *) mmdesc->md_info[attno - 1]->oi_opaque;
+ 
+ 	/*
+ 	 * We cache these in the opaque struct, to avoid repetitive syscache
+ 	 * lookups.
+ 	 */
+ 	if (!opaque->inited[basenum])
+ 	{
+ 		fmgr_info_copy(&opaque->operators[basenum],
+ 					   index_getprocinfo(mmdesc->md_index, attno, procnum),
+ 					   CurrentMemoryContext);
+ 		opaque->inited[basenum] = true;
+ 	}
+ 
+ 	return &opaque->operators[basenum];
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmtuple.c
***************
*** 0 ****
--- 1,478 ----
+ /*
+  * MinMax-specific tuples
+  *		Method implementations for tuples in minmax indexes.
+  *
+  * Intended usage is that code outside this file only deals with
+  * DeformedMMTuples, and convert to and from the on-disk representation through
+  * functions in this file.
+  *
+  * NOTES
+  *
+  * A minmax tuple is similar to a heap tuple, with a few key differences.  The
+  * first interesting difference is that the tuple header is much simpler, only
+  * containing its total length and a small area for flags.	Also, the stored
+  * data does not match the relation tuple descriptor exactly: for each
+  * attribute in the descriptor, the index tuple carries an arbitrary number
+  * of values, depending on the opclass.
+  *
+  * Also, for each column of the index relation there are two null bits: one
+  * (hasnulls) stores whether any tuple within the page range has that column
+  * set to null; the other one (allnulls) stores whether the column values are
+  * all null.  If allnulls is true, then the tuple data area does not contain
+  * values for that column at all; whereas it does if the hasnulls is set.
+  * Note the size of the null bitmask may not be the same as that of the
+  * datum array.
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmtuple.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/htup_details.h"
+ #include "access/minmax_tuple.h"
+ #include "access/tupdesc.h"
+ #include "access/tupmacs.h"
+ 
+ 
+ static inline void mm_deconstruct_tuple(MinmaxDesc *mmdesc,
+ 					 char *tp, bits8 *nullbits, bool nulls,
+ 					 Datum *values, bool *allnulls, bool *hasnulls);
+ 
+ 
+ /*
+  * Return a tuple descriptor used for on-disk storage of minmax tuples.
+  */
+ static TupleDesc
+ mmtuple_disk_tupdesc(MinmaxDesc *mmdesc)
+ {
+ 	/* We cache these in the MinmaxDesc */
+ 	if (mmdesc->md_disktdesc == NULL)
+ 	{
+ 		int			i;
+ 		int			j;
+ 		AttrNumber	attno = 1;
+ 		TupleDesc	tupdesc;
+ 
+ 		tupdesc = CreateTemplateTupleDesc(mmdesc->md_totalstored, false);
+ 
+ 		for (i = 0; i < mmdesc->md_tupdesc->natts; i++)
+ 		{
+ 			for (j = 0; j < mmdesc->md_info[i]->oi_nstored; j++)
+ 				TupleDescInitEntry(tupdesc, attno++, NULL,
+ 								   mmdesc->md_info[i]->oi_typids[j],
+ 								   -1, 0);
+ 		}
+ 
+ 		mmdesc->md_disktdesc = tupdesc;
+ 	}
+ 
+ 	return mmdesc->md_disktdesc;
+ }
+ 
+ /*
+  * Generate a new on-disk tuple to be inserted in a minmax index.
+  */
+ MMTuple *
+ minmax_form_tuple(MinmaxDesc *mmdesc, BlockNumber blkno,
+ 				  DeformedMMTuple *tuple, Size *size)
+ {
+ 	Datum	   *values;
+ 	bool	   *nulls;
+ 	bool		anynulls = false;
+ 	MMTuple    *rettuple;
+ 	int			keyno;
+ 	int			idxattno;
+ 	uint16		phony_infomask;
+ 	bits8	   *phony_nullbitmap;
+ 	Size		len,
+ 				hoff,
+ 				data_len;
+ 
+ 	Assert(mmdesc->md_totalstored > 0);
+ 
+ 	values = palloc(sizeof(Datum) * mmdesc->md_totalstored);
+ 	nulls = palloc0(sizeof(bool) * mmdesc->md_totalstored);
+ 	phony_nullbitmap = palloc(sizeof(bits8) * BITMAPLEN(mmdesc->md_totalstored));
+ 
+ 	/*
+ 	 * Set up the values/nulls arrays for heap_fill_tuple
+ 	 */
+ 	idxattno = 0;
+ 	for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 	{
+ 		int		datumno;
+ 
+ 		/*
+ 		 * "allnulls" is set when there's no nonnull value in any row in
+ 		 * the column; when this happens, there is no data to store.  Thus
+ 		 * set the nullable bits for all data elements of this column and
+ 		 * we're done.
+ 		 */
+ 		if (tuple->dt_columns[keyno].allnulls)
+ 		{
+ 			for (datumno = 0;
+ 				 datumno < mmdesc->md_info[keyno]->oi_nstored;
+ 				 datumno++)
+ 				nulls[idxattno++] = true;
+ 			anynulls = true;
+ 			continue;
+ 		}
+ 
+ 		/*
+ 		 * The "hasnulls" bit is set when there are some null values in the
+ 		 * data.  We still need to store a real value, but the presence of this
+ 		 * means we need a null bitmap.
+ 		 */
+ 		if (tuple->dt_columns[keyno].hasnulls)
+ 			anynulls = true;
+ 
+ 		for (datumno = 0;
+ 			 datumno < mmdesc->md_info[keyno]->oi_nstored;
+ 			 datumno++)
+ 			values[idxattno++] = tuple->dt_columns[keyno].values[datumno];
+ 	}
+ 
+ 	/* compute total space needed */
+ 	len = SizeOfMinMaxTuple;
+ 	if (anynulls)
+ 	{
+ 		/*
+ 		 * We need a double-length bitmap on an on-disk minmax index tuple;
+ 		 * the first half stores the "allnulls" bits, the second stores
+ 		 * "hasnulls".
+ 		 */
+ 		len += BITMAPLEN(mmdesc->md_tupdesc->natts * 2);
+ 	}
+ 
+ 	len = hoff = MAXALIGN(len);
+ 
+ 	data_len = heap_compute_data_size(mmtuple_disk_tupdesc(mmdesc),
+ 									  values, nulls);
+ 
+ 	len += data_len;
+ 
+ 	rettuple = palloc0(len);
+ 	rettuple->mt_blkno = blkno;
+ 	rettuple->mt_info = hoff;
+ 	Assert((rettuple->mt_info & MMIDX_OFFSET_MASK) == hoff);
+ 
+ 	/*
+ 	 * The infomask and null bitmap as computed by heap_fill_tuple are useless
+ 	 * to us.  However, that function will not accept a null infomask; and we
+ 	 * need to pass a valid null bitmap so that it will correctly skip
+ 	 * outputting null attributes in the data area.
+ 	 */
+ 	heap_fill_tuple(mmtuple_disk_tupdesc(mmdesc),
+ 					values,
+ 					nulls,
+ 					(char *) rettuple + hoff,
+ 					data_len,
+ 					&phony_infomask,
+ 					phony_nullbitmap);
+ 
+ 	/* done with these */
+ 	pfree(values);
+ 	pfree(nulls);
+ 	pfree(phony_nullbitmap);
+ 
+ 	/*
+ 	 * Now fill in the real null bitmasks.	allnulls first.
+ 	 */
+ 	if (anynulls)
+ 	{
+ 		bits8	   *bitP;
+ 		int			bitmask;
+ 
+ 		rettuple->mt_info |= MMIDX_NULLS_MASK;
+ 
+ 		/*
+ 		 * Note that we reverse the sense of null bits in this module: we store
+ 		 * a 1 for a null attribute rather than a 0.  So we must reverse the
+ 		 * sense of the att_isnull test in mm_deconstruct_tuple as well.
+ 		 */
+ 		bitP = ((bits8 *) ((char *) rettuple + SizeOfMinMaxTuple)) - 1;
+ 		bitmask = HIGHBIT;
+ 		for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 		{
+ 			if (bitmask != HIGHBIT)
+ 				bitmask <<= 1;
+ 			else
+ 			{
+ 				bitP += 1;
+ 				*bitP = 0x0;
+ 				bitmask = 1;
+ 			}
+ 
+ 			if (!tuple->dt_columns[keyno].allnulls)
+ 				continue;
+ 
+ 			*bitP |= bitmask;
+ 		}
+ 		/* hasnulls bits follow */
+ 		for (keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 		{
+ 			if (bitmask != HIGHBIT)
+ 				bitmask <<= 1;
+ 			else
+ 			{
+ 				bitP += 1;
+ 				*bitP = 0x0;
+ 				bitmask = 1;
+ 			}
+ 
+ 			if (!tuple->dt_columns[keyno].hasnulls)
+ 				continue;
+ 
+ 			*bitP |= bitmask;
+ 		}
+ 		bitP = ((bits8 *) (rettuple + SizeOfMinMaxTuple)) - 1;
+ 	}
+ 
+ 	*size = len;
+ 	return rettuple;
+ }
+ 
+ /*
+  * Free a tuple created by minmax_form_tuple
+  */
+ void
+ minmax_free_tuple(MMTuple *tuple)
+ {
+ 	pfree(tuple);
+ }
+ 
+ MMTuple *
+ minmax_copy_tuple(MMTuple *tuple, Size len)
+ {
+ 	MMTuple *newtup;
+ 
+ 	newtup = palloc(len);
+ 	memcpy(newtup, tuple, len);
+ 
+ 	return newtup;
+ }
+ 
+ bool
+ minmax_tuples_equal(const MMTuple *a, Size alen, const MMTuple *b, Size blen)
+ {
+ 	if (alen != blen)
+ 		return false;
+ 	if (memcmp(a, b, alen) != 0)
+ 		return false;
+ 	return true;
+ }
+ 
+ /*
+  * Create a new DeformedMMTuple from scratch, and initialize it to an empty
+  * state.
+  */
+ DeformedMMTuple *
+ minmax_new_dtuple(MinmaxDesc *mmdesc)
+ {
+ 	DeformedMMTuple *dtup;
+ 	char   *currdatum;
+ 	long	basesize;
+ 	int		i;
+ 
+ 	basesize = MAXALIGN(sizeof(DeformedMMTuple) +
+ 						sizeof(MMValues) * mmdesc->md_tupdesc->natts);
+ 	dtup = palloc0(basesize + sizeof(Datum) * mmdesc->md_totalstored);
+ 	currdatum = (char *) dtup + basesize;
+ 	for (i = 0; i < mmdesc->md_tupdesc->natts; i++)
+ 	{
+ 		dtup->dt_columns[i].allnulls = true;
+ 		dtup->dt_columns[i].hasnulls = false;
+ 		dtup->dt_columns[i].values = (Datum *) currdatum;
+ 		currdatum += sizeof(Datum) * mmdesc->md_info[i]->oi_nstored;
+ 	}
+ 
+ 	return dtup;
+ }
+ 
+ /*
+  * Reset a DeformedMMTuple to initial state
+  */
+ void
+ minmax_dtuple_initialize(DeformedMMTuple *dtuple, MinmaxDesc *mmdesc)
+ {
+ 	int		i;
+ 
+ 	for (i = 0; i < mmdesc->md_tupdesc->natts; i++)
+ 	{
+ 		/*
+ 		 * FIXME -- we may need to pfree() some datums here before clobbering
+ 		 * the whole thing
+ 		 */
+ 		dtuple->dt_columns[i].allnulls = true;
+ 		dtuple->dt_columns[i].hasnulls = false;
+ 		memset(dtuple->dt_columns[i].values, 0,
+ 			   sizeof(Datum) * mmdesc->md_info[i]->oi_nstored);
+ 	}
+ }
+ 
+ /*
+  * Convert a MMTuple back to a DeformedMMTuple.  This is the reverse of
+  * minmax_form_tuple.
+  *
+  * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
+  * deconstruct the tuple from the on-disk format.
+  *
+  * XXX some callers might need copies of each datum; if so we need to apply
+  * datumCopy inside the loop.	We probably also need a minmax_free_dtuple()
+  * function.
+  */
+ DeformedMMTuple *
+ minmax_deform_tuple(MinmaxDesc *mmdesc, MMTuple *tuple)
+ {
+ 	DeformedMMTuple *dtup;
+ 	Datum	   *values;
+ 	bool	   *allnulls;
+ 	bool	   *hasnulls;
+ 	char	   *tp;
+ 	bits8	   *nullbits;
+ 	int			keyno;
+ 	int			valueno;
+ 
+ 	dtup = minmax_new_dtuple(mmdesc);
+ 
+ 	values = palloc(sizeof(Datum) * mmdesc->md_totalstored);
+ 	allnulls = palloc(sizeof(bool) * mmdesc->md_tupdesc->natts);
+ 	hasnulls = palloc(sizeof(bool) * mmdesc->md_tupdesc->natts);
+ 
+ 	tp = (char *) tuple + MMTupleDataOffset(tuple);
+ 
+ 	if (MMTupleHasNulls(tuple))
+ 		nullbits = (bits8 *) ((char *) tuple + SizeOfMinMaxTuple);
+ 	else
+ 		nullbits = NULL;
+ 	mm_deconstruct_tuple(mmdesc,
+ 						 tp, nullbits, MMTupleHasNulls(tuple),
+ 						 values, allnulls, hasnulls);
+ 
+ 	/*
+ 	 * Iterate to assign each of the values to the corresponding item
+ 	 * in the values array of each column.
+ 	 */
+ 	for (valueno = 0, keyno = 0; keyno < mmdesc->md_tupdesc->natts; keyno++)
+ 	{
+ 		int		i;
+ 
+ 		if (allnulls[keyno])
+ 		{
+ 			valueno += mmdesc->md_info[keyno]->oi_nstored;
+ 			continue;
+ 		}
+ 
+ 		dtup->dt_columns[keyno].values =
+ 			palloc(sizeof(Datum) * mmdesc->md_totalstored);
+ 
+ 		/* XXX optional datumCopy()? */
+ 		for (i = 0; i < mmdesc->md_info[keyno]->oi_nstored; i++)
+ 			dtup->dt_columns[keyno].values[i] = values[valueno++];
+ 
+ 		dtup->dt_columns[keyno].hasnulls = hasnulls[keyno];
+ 		dtup->dt_columns[keyno].allnulls = false;
+ 	}
+ 
+ 	pfree(values);
+ 	pfree(allnulls);
+ 	pfree(hasnulls);
+ 
+ 	return dtup;
+ }
+ 
+ /*
+  * mm_deconstruct_tuple
+  *		Guts of attribute extraction from an on-disk minmax tuple.
+  *
+  * Its arguments are:
+  *	mmdesc		minmax descriptor for the stored tuple
+  *	tp			pointer to the tuple data area
+  *	nullbits	pointer to the tuple nulls bitmask
+  *	nulls		"has nulls" bit in tuple infomask
+  *	values		output values, array of size mmdesc->md_totalstored
+  *	allnulls	output "allnulls", size mmdesc->md_tupdesc->natts
+  *	hasnulls	output "hasnulls", size mmdesc->md_tupdesc->natts
+  *
+  * Output arrays must have been allocated by caller.
+  */
+ static inline void
+ mm_deconstruct_tuple(MinmaxDesc *mmdesc,
+ 					 char *tp, bits8 *nullbits, bool nulls,
+ 					 Datum *values, bool *allnulls, bool *hasnulls)
+ {
+ 	int			attnum;
+ 	int			stored;
+ 	TupleDesc	diskdsc;
+ 	long		off;
+ 
+ 	/*
+ 	 * First iterate to natts to obtain both null flags for each attribute.
+ 	 * Note that we reverse the sense of the att_isnull test, because we store
+ 	 * 1 for a null value (rather than a 1 for a not null value as is the
+ 	 * att_isnull convention used elsewhere.)  See minmax_form_tuple.
+ 	 */
+ 	for (attnum = 0; attnum < mmdesc->md_tupdesc->natts; attnum++)
+ 	{
+ 		/*
+ 		 * the "all nulls" bit means that all values in the page range for
+ 		 * this column are nulls.  Therefore there are no values in the tuple
+ 		 * data area.
+ 		 */
+ 		allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
+ 
+ 		/*
+ 		 * the "has nulls" bit means that some tuples have nulls, but others
+ 		 * have not-null values.  Therefore we know the tuple contains data for
+ 		 * this column.
+ 		 *
+ 		 * The hasnulls bits follow the allnulls bits in the same bitmask.
+ 		 */
+ 		hasnulls[attnum] =
+ 			nulls && !att_isnull(mmdesc->md_tupdesc->natts + attnum, nullbits);
+ 	}
+ 
+ 	/*
+ 	 * Iterate to obtain each attribute's stored values.  Note that since we
+ 	 * may reuse attribute entries for more than one column, we cannot cache
+ 	 * offsets here.
+ 	 */
+ 	diskdsc = mmtuple_disk_tupdesc(mmdesc);
+ 	stored = 0;
+ 	off = 0;
+ 	for (attnum = 0; attnum < mmdesc->md_tupdesc->natts; attnum++)
+ 	{
+ 		int		datumno;
+ 
+ 		if (allnulls[attnum])
+ 		{
+ 			stored += mmdesc->md_info[attnum]->oi_nstored;
+ 			continue;
+ 		}
+ 
+ 		for (datumno = 0;
+ 			 datumno < mmdesc->md_info[attnum]->oi_nstored;
+ 			 datumno++)
+ 		{
+ 			Form_pg_attribute thisatt = diskdsc->attrs[stored];
+ 
+ 			if (thisatt->attlen == -1)
+ 			{
+ 				off = att_align_pointer(off, thisatt->attalign, -1,
+ 										tp + off);
+ 			}
+ 			else
+ 			{
+ 				/* not varlena, so safe to use att_align_nominal */
+ 				off = att_align_nominal(off, thisatt->attalign);
+ 			}
+ 
+ 			values[stored++] = fetchatt(thisatt, tp + off);
+ 
+ 			off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+ 		}
+ 	}
+ }
*** /dev/null
--- b/src/backend/access/minmax/mmxlog.c
***************
*** 0 ****
--- 1,323 ----
+ /*
+  * mmxlog.c
+  *		XLog replay routines for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmxlog.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax.h"
+ #include "access/minmax_internal.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
+ #include "access/minmax_xlog.h"
+ #include "access/xlogutils.h"
+ #include "storage/freespace.h"
+ 
+ 
+ /*
+  * xlog replay routines
+  */
+ static void
+ minmax_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_createidx *xlrec = (xl_minmax_createidx *) XLogRecGetData(record);
+ 	Buffer		buf;
+ 	Page		page;
+ 
+ 	/* Backup blocks are not used in create_index records */
+ 	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ 
+ 	/* create the index' metapage */
+ 	buf = XLogReadBuffer(xlrec->node, MINMAX_METAPAGE_BLKNO, true);
+ 	Assert(BufferIsValid(buf));
+ 	page = (Page) BufferGetPage(buf);
+ 	mm_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buf);
+ 	UnlockReleaseBuffer(buf);
+ 
+ 	/* also initialize its first revmap page */
+ 	buf = XLogReadBuffer(xlrec->node, 1, true);
+ 	Assert(BufferIsValid(buf));
+ 	page = (Page) BufferGetPage(buf);
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buf);
+ 	UnlockReleaseBuffer(buf);
+ }
+ 
+ /*
+  * Common part of an insert or update. Inserts the new tuple and updates the
+  * revmap.
+  */
+ static void
+ minmax_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record, xl_minmax_insert *xlrec,
+ 				   MMTuple *mmtuple, int tuplen)
+ {
+ 	BlockNumber	blkno;
+ 	Buffer		buffer;
+ 	Page		page;
+ 	OffsetNumber offnum;
+ 
+ 	/* If we have a full-page image, restore it */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 	}
+ 	else
+ 	{
+ 		Assert(mmtuple->mt_blkno == xlrec->heapBlk);
+ 
+ 		blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
+ 		if (record->xl_info & XLOG_MINMAX_INIT_PAGE)
+ 		{
+ 			buffer = XLogReadBuffer(xlrec->node, blkno, true);
+ 			Assert(BufferIsValid(buffer));
+ 			page = (Page) BufferGetPage(buffer);
+ 
+ 			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
+ 		}
+ 		else
+ 		{
+ 			buffer = XLogReadBuffer(xlrec->node, blkno, false);
+ 		}
+ 		if (BufferIsValid(buffer))
+ 		{
+ 			page = (Page) BufferGetPage(buffer);
+ 
+ 			if (lsn > PageGetLSN(page))
+ 			{
+ 				offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ 				if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ 					elog(PANIC, "minmax_xlog_insert: invalid max offset number");
+ 
+ 				offnum = PageAddItem(page, (Item) mmtuple, tuplen, offnum, true, false);
+ 				if (offnum == InvalidOffsetNumber)
+ 					elog(PANIC, "minmax_xlog_insert: failed to add tuple");
+ 
+ 				PageSetLSN(page, lsn);
+ 				MarkBufferDirty(buffer);
+ 			}
+ 			UnlockReleaseBuffer(buffer);
+ 		}
+ 	}
+ 
+ 	/* update the revmap */
+ 	if (record->xl_info & XLR_BKP_BLOCK(1))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 1, false, false);
+ 	}
+ 	else
+ 	{
+ 		buffer = XLogReadBuffer(xlrec->node, xlrec->revmapBlk, false);
+ 		if (BufferIsValid(buffer))
+ 		{
+ 			page = (Page) BufferGetPage(buffer);
+ 
+ 			if (lsn > PageGetLSN(page))
+ 			{
+ 				mmSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, xlrec->tid);
+ 				PageSetLSN(page, lsn);
+ 				MarkBufferDirty(buffer);
+ 			}
+ 			UnlockReleaseBuffer(buffer);
+ 		}
+ 	}
+ 
+ 	/* XXX no FSM updates here ... */
+ }
+ 
+ static void
+ minmax_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_insert *xlrec = (xl_minmax_insert *) XLogRecGetData(record);
+ 	MMTuple	   *newtup;
+ 	int			tuplen;
+ 
+ 	tuplen = record->xl_len - SizeOfMinmaxInsert;
+ 	newtup = (MMTuple *) ((char *) xlrec + SizeOfMinmaxInsert);
+ 
+ 	minmax_xlog_insert_update(lsn, record, xlrec, newtup, tuplen);
+ }
+ 
+ static void
+ minmax_xlog_update(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_update *xlrec = (xl_minmax_update *) XLogRecGetData(record);
+ 	BlockNumber blkno;
+ 	OffsetNumber offnum;
+ 	Buffer		buffer;
+ 	Page		page;
+ 	MMTuple	   *newtup;
+ 	int			tuplen;
+ 
+ 	tuplen = record->xl_len - SizeOfMinmaxUpdate;
+ 	newtup = (MMTuple *) ((char *) xlrec + SizeOfMinmaxUpdate);
+ 
+ 	/* First insert the new tuple and update revmap, like in an insertion. */
+ 	minmax_xlog_insert_update(lsn, record, &xlrec->new, newtup, tuplen);
+ 
+ 	/* Then remove the old tuple */
+ 	if (record->xl_info & XLR_BKP_BLOCK(2))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 2, false, false);
+ 	}
+ 	else
+ 	{
+ 		blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
+ 		buffer = XLogReadBuffer(xlrec->new.node, blkno, false);
+ 		if (BufferIsValid(buffer))
+ 		{
+ 			page = (Page) BufferGetPage(buffer);
+ 
+ 			if (lsn > PageGetLSN(page))
+ 			{
+ 				offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
+ 				if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ 					elog(PANIC, "minmax_xlog_insert: invalid max offset number");
+ 
+ 				PageIndexDeleteNoCompact(page, &offnum, 1);
+ 
+ 				PageSetLSN(page, lsn);
+ 				MarkBufferDirty(buffer);
+ 			}
+ 			UnlockReleaseBuffer(buffer);
+ 		}
+ 	}
+ }
+ 
+ /*
+  * Update a tuple on a single page.
+  */
+ static void
+ minmax_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_samepage_update *xlrec = (xl_minmax_samepage_update *) XLogRecGetData(record);
+ 	BlockNumber	blkno;
+ 	Buffer		buffer;
+ 	Page		page;
+ 	OffsetNumber offnum;
+ 
+ 	/* If we have a full-page image, restore it */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		(void) RestoreBackupBlock(lsn, record, 0, false, false);
+ 	}
+ 	else
+ 	{
+ 		MMTuple	   *mmtuple;
+ 		int			tuplen;
+ 
+ 		tuplen = record->xl_len - SizeOfMinmaxSamepageUpdate;
+ 		mmtuple = (MMTuple *) ((char *) xlrec + SizeOfMinmaxSamepageUpdate);
+ 
+ 		blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
+ 		buffer = XLogReadBuffer(xlrec->node, blkno, false);
+ 		if (BufferIsValid(buffer))
+ 		{
+ 			page = (Page) BufferGetPage(buffer);
+ 
+ 			if (lsn > PageGetLSN(page))
+ 			{
+ 				offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
+ 				if (PageGetMaxOffsetNumber(page) + 1 < offnum)
+ 					elog(PANIC, "minmax_xlog_samepage_update: invalid max offset number");
+ 
+ 				PageIndexDeleteNoCompact(page, &offnum, 1);
+ 				offnum = PageAddItem(page, (Item) mmtuple, tuplen, offnum, true, false);
+ 				if (offnum == InvalidOffsetNumber)
+ 					elog(PANIC, "minmax_xlog_samepage_update: failed to add tuple");
+ 
+ 				PageSetLSN(page, lsn);
+ 				MarkBufferDirty(buffer);
+ 			}
+ 			UnlockReleaseBuffer(buffer);
+ 		}
+ 	}
+ 
+ 	/* XXX no FSM updates here ... */
+ }
+ 
+ 
+ static void
+ minmax_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	xl_minmax_revmap_extend *xlrec = (xl_minmax_revmap_extend *) XLogRecGetData(record);
+ 	Buffer	metabuf;
+ 	Page	metapg;
+ 	MinmaxMetaPageData *metadata;
+ 	Buffer	buf;
+ 	Page	page;
+ 
+ 	/* Update the metapage */
+ 	if (record->xl_info & XLR_BKP_BLOCK(0))
+ 	{
+ 		metabuf = RestoreBackupBlock(lsn, record, 0, false, true);
+ 	}
+ 	else
+ 	{
+ 		metabuf = XLogReadBuffer(xlrec->node, MINMAX_METAPAGE_BLKNO, false);
+ 		if (BufferIsValid(metabuf))
+ 		{
+ 			metapg = BufferGetPage(metabuf);
+ 			if (lsn > PageGetLSN(metapg))
+ 			{
+ 				metadata = (MinmaxMetaPageData *) PageGetContents(metapg);
+ 
+ 				Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1);
+ 				metadata->lastRevmapPage = xlrec->targetBlk;
+ 
+ 				PageSetLSN(metapg, lsn);
+ 				MarkBufferDirty(metabuf);
+ 			}
+ 		}
+ 	}
+ 
+ 	/*
+ 	 * Re-init the target block as a revmap page.  There's never a full-
+ 	 * page image here.
+ 	 */
+ 
+ 	buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
+ 	page = (Page) BufferGetPage(buf);
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 
+ 	PageSetLSN(page, lsn);
+ 	MarkBufferDirty(buf);
+ 
+ 	UnlockReleaseBuffer(buf);
+ 	UnlockReleaseBuffer(metabuf);
+ }
+ 
+ void
+ minmax_redo(XLogRecPtr lsn, XLogRecord *record)
+ {
+ 	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	switch (info & XLOG_MINMAX_OPMASK)
+ 	{
+ 		case XLOG_MINMAX_CREATE_INDEX:
+ 			minmax_xlog_createidx(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_INSERT:
+ 			minmax_xlog_insert(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_UPDATE:
+ 			minmax_xlog_update(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_SAMEPAGE_UPDATE:
+ 			minmax_xlog_samepage_update(lsn, record);
+ 			break;
+ 		case XLOG_MINMAX_REVMAP_EXTEND:
+ 			minmax_xlog_revmap_extend(lsn, record);
+ 			break;
+ 		default:
+ 			elog(PANIC, "minmax_redo: unknown op code %u", info);
+ 	}
+ }
*** a/src/backend/access/rmgrdesc/Makefile
--- b/src/backend/access/rmgrdesc/Makefile
***************
*** 9,15 **** top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
  OBJS = clogdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \
! 	   mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o smgrdesc.o spgdesc.o \
  	   standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 9,16 ----
  include $(top_builddir)/src/Makefile.global
  
  OBJS = clogdesc.o dbasedesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \
! 	   minmaxdesc.o mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o \
! 	   smgrdesc.o spgdesc.o \
  	   standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
  
  include $(top_srcdir)/src/backend/common.mk
*** /dev/null
--- b/src/backend/access/rmgrdesc/minmaxdesc.c
***************
*** 0 ****
--- 1,89 ----
+ /*-------------------------------------------------------------------------
+  *
+  * minmaxdesc.c
+  *	  rmgr descriptor routines for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/rmgrdesc/minmaxdesc.c
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax_xlog.h"
+ 
+ void
+ minmax_desc(StringInfo buf, XLogRecord *record)
+ {
+ 	char	   *rec = XLogRecGetData(record);
+ 	uint8		info = record->xl_info & ~XLR_INFO_MASK;
+ 
+ 	info &= XLOG_MINMAX_OPMASK;
+ 	if (info == XLOG_MINMAX_CREATE_INDEX)
+ 	{
+ 		xl_minmax_createidx *xlrec = (xl_minmax_createidx *) rec;
+ 
+ 		appendStringInfo(buf, "create index: v%d pagesPerRange %u %u/%u/%u",
+ 						 xlrec->version, xlrec->pagesPerRange,
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode);
+ 	}
+ 	else if (info == XLOG_MINMAX_INSERT)
+ 	{
+ 		xl_minmax_insert *xlrec = (xl_minmax_insert *) rec;
+ 
+ 		if (record->xl_info & XLOG_MINMAX_INIT_PAGE)
+ 			appendStringInfo(buf, "insert(init): ");
+ 		else
+ 			appendStringInfo(buf, "insert: ");
+ 		appendStringInfo(buf, "%u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode,
+ 						 xlrec->heapBlk, xlrec->revmapBlk,
+ 						 xlrec->pagesPerRange,
+ 						 ItemPointerGetBlockNumber(&xlrec->tid),
+ 						 ItemPointerGetOffsetNumber(&xlrec->tid));
+ 	}
+ 	else if (info == XLOG_MINMAX_UPDATE)
+ 	{
+ 		xl_minmax_update *xlrec = (xl_minmax_update *) rec;
+ 
+ 		if (record->xl_info & XLOG_MINMAX_INIT_PAGE)
+ 			appendStringInfo(buf, "update(init): ");
+ 		else
+ 			appendStringInfo(buf, "update: ");
+ 		appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
+ 						 xlrec->new.node.spcNode, xlrec->new.node.dbNode,
+ 						 xlrec->new.node.relNode,
+ 						 xlrec->new.heapBlk, xlrec->new.revmapBlk,
+ 						 xlrec->new.pagesPerRange,
+ 						 ItemPointerGetBlockNumber(&xlrec->oldtid),
+ 						 ItemPointerGetOffsetNumber(&xlrec->oldtid),
+ 						 ItemPointerGetBlockNumber(&xlrec->new.tid),
+ 						 ItemPointerGetOffsetNumber(&xlrec->new.tid));
+ 	}
+ 	else if (info == XLOG_MINMAX_SAMEPAGE_UPDATE)
+ 	{
+ 		xl_minmax_samepage_update *xlrec = (xl_minmax_samepage_update *) rec;
+ 
+ 		appendStringInfo(buf, "samepage_update: rel %u/%u/%u TID (%u,%u)",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode,
+ 						 ItemPointerGetBlockNumber(&xlrec->tid),
+ 						 ItemPointerGetOffsetNumber(&xlrec->tid));
+ 	}
+ 	else if (info == XLOG_MINMAX_REVMAP_EXTEND)
+ 	{
+ 		xl_minmax_revmap_extend *xlrec = (xl_minmax_revmap_extend *) rec;
+ 
+ 		appendStringInfo(buf, "revmap extend: rel %u/%u/%u targetBlk %u",
+ 						 xlrec->node.spcNode, xlrec->node.dbNode,
+ 						 xlrec->node.relNode, xlrec->targetBlk);
+ 	}
+ 	else
+ 		appendStringInfo(buf, "UNKNOWN");
+ }
*** a/src/backend/access/transam/rmgr.c
--- b/src/backend/access/transam/rmgr.c
***************
*** 12,17 ****
--- 12,18 ----
  #include "access/gist_private.h"
  #include "access/hash.h"
  #include "access/heapam_xlog.h"
+ #include "access/minmax_xlog.h"
  #include "access/multixact.h"
  #include "access/nbtree.h"
  #include "access/spgist.h"
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 2096,2101 **** IndexBuildHeapScan(Relation heapRelation,
--- 2096,2122 ----
  				   IndexBuildCallback callback,
  				   void *callback_state)
  {
+ 	return IndexBuildHeapRangeScan(heapRelation, indexRelation,
+ 								   indexInfo, allow_sync,
+ 								   0, InvalidBlockNumber,
+ 								   callback, callback_state);
+ }
+ 
+ /*
+  * As above, except that instead of scanning the complete heap, only the given
+  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
+  * passing InvalidBlockNumber as numblocks.
+  */
+ double
+ IndexBuildHeapRangeScan(Relation heapRelation,
+ 						Relation indexRelation,
+ 						IndexInfo *indexInfo,
+ 						bool allow_sync,
+ 						BlockNumber start_blockno,
+ 						BlockNumber numblocks,
+ 						IndexBuildCallback callback,
+ 						void *callback_state)
+ {
  	bool		is_system_catalog;
  	bool		checking_uniqueness;
  	HeapScanDesc scan;
***************
*** 2166,2171 **** IndexBuildHeapScan(Relation heapRelation,
--- 2187,2195 ----
  								true,	/* buffer access strategy OK */
  								allow_sync);	/* syncscan OK? */
  
+ 	/* set our endpoints */
+ 	heap_setscanlimits(scan, start_blockno, numblocks);
+ 
  	reltuples = 0;
  
  	/*
*** a/src/backend/replication/logical/decode.c
--- b/src/backend/replication/logical/decode.c
***************
*** 132,137 **** LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogRecord *record)
--- 132,138 ----
  		case RM_GIST_ID:
  		case RM_SEQ_ID:
  		case RM_SPGIST_ID:
+ 		case RM_MINMAX_ID:
  			break;
  		case RM_NEXT_ID:
  			elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) buf.record.xl_rmid);
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 399,405 **** PageRestoreTempPage(Page tempPage, Page oldPage)
  }
  
  /*
!  * sorting support for PageRepairFragmentation and PageIndexMultiDelete
   */
  typedef struct itemIdSortData
  {
--- 399,406 ----
  }
  
  /*
!  * sorting support for PageRepairFragmentation, PageIndexMultiDelete,
!  * PageIndexDeleteNoCompact
   */
  typedef struct itemIdSortData
  {
***************
*** 896,901 **** PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
--- 897,1078 ----
  	phdr->pd_upper = upper;
  }
  
+ /*
+  * PageIndexDeleteNoCompact
+  *		Delete the given items for an index page, and defragment the resulting
+  *		free space, but do not compact the item pointers array.
+  *
+  * itemnos is the array of tuples to delete; nitems is its size.  maxIdxTuples
+  * is the maximum number of tuples that can exist in a page.
+  *
+  * Unused items at the end of the array are removed.
+  *
+  * This is used for index AMs that require that existing TIDs of live tuples
+  * remain unchanged.
+  */
+ void
+ PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos, int nitems)
+ {
+ 	PageHeader	phdr = (PageHeader) page;
+ 	LocationIndex pd_lower = phdr->pd_lower;
+ 	LocationIndex pd_upper = phdr->pd_upper;
+ 	LocationIndex pd_special = phdr->pd_special;
+ 	int			nline;
+ 	bool		empty;
+ 	OffsetNumber offnum;
+ 	int			nextitm;
+ 
+ 	/*
+ 	 * As with PageRepairFragmentation, paranoia seems justified.
+ 	 */
+ 	if (pd_lower < SizeOfPageHeaderData ||
+ 		pd_lower > pd_upper ||
+ 		pd_upper > pd_special ||
+ 		pd_special > BLCKSZ ||
+ 		pd_special != MAXALIGN(pd_special))
+ 		ereport(ERROR,
+ 				(errcode(ERRCODE_DATA_CORRUPTED),
+ 				 errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
+ 						pd_lower, pd_upper, pd_special)));
+ 
+ 	/*
+ 	 * Scan the existing item pointer array and mark as unused those that are
+ 	 * in our kill-list; make sure any non-interesting ones are marked unused
+ 	 * as well.
+ 	 */
+ 	nline = PageGetMaxOffsetNumber(page);
+ 	empty = true;
+ 	nextitm = 0;
+ 	for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
+ 	{
+ 		ItemId		lp;
+ 		ItemLength	itemlen;
+ 		ItemOffset	offset;
+ 
+ 		lp = PageGetItemId(page, offnum);
+ 
+ 		itemlen = ItemIdGetLength(lp);
+ 		offset = ItemIdGetOffset(lp);
+ 
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			if (offset < pd_upper ||
+ 				(offset + itemlen) > pd_special ||
+ 				offset != MAXALIGN(offset))
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_DATA_CORRUPTED),
+ 						 errmsg("corrupted item pointer: offset = %u, length = %u",
+ 								offset, (unsigned int) itemlen)));
+ 
+ 			if (nextitm < nitems && offnum == itemnos[nextitm])
+ 			{
+ 				/* this one is on our list to delete, so mark it unused */
+ 				ItemIdSetUnused(lp);
+ 				nextitm++;
+ 			}
+ 			else if (ItemIdHasStorage(lp))
+ 			{
+ 				/* This one's live -- must do the compaction dance */
+ 				empty = false;
+ 			}
+ 			else
+ 			{
+ 				/* get rid of this one too */
+ 				ItemIdSetUnused(lp);
+ 			}
+ 		}
+ 	}
+ 
+ 	/* this will catch invalid or out-of-order itemnos[] */
+ 	if (nextitm != nitems)
+ 		elog(ERROR, "incorrect index offsets supplied");
+ 
+ 	if (empty)
+ 	{
+ 		/* Page is completely empty, so just reset it quickly */
+ 		phdr->pd_lower = SizeOfPageHeaderData;
+ 		phdr->pd_upper = pd_special;
+ 	}
+ 	else
+ 	{
+ 		/* There are live items: need to compact the page the hard way */
+ 		itemIdSortData itemidbase[MaxOffsetNumber];
+ 		itemIdSort	itemidptr;
+ 		int			i;
+ 		Size		totallen;
+ 		Offset		upper;
+ 
+ 		/*
+ 		 * Scan the page taking note of each item that we need to preserve.
+ 		 * This includes both live items (those that contain data) and
+ 		 * interspersed unused ones.  It's critical to preserve these unused
+ 		 * items, because otherwise the offset numbers for later live items
+ 		 * would change, which is not acceptable.  Unused items might get used
+ 		 * again later; that is fine.
+ 		 */
+ 		itemidptr = itemidbase;
+ 		totallen = 0;
+ 		for (i = 0; i < nline; i++, itemidptr++)
+ 		{
+ 			ItemId		lp;
+ 
+ 			itemidptr->offsetindex = i;
+ 
+ 			lp = PageGetItemId(page, i + 1);
+ 			if (ItemIdHasStorage(lp))
+ 			{
+ 				itemidptr->itemoff = ItemIdGetOffset(lp);
+ 				itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
+ 				totallen += itemidptr->alignedlen;
+ 			}
+ 			else
+ 			{
+ 				itemidptr->itemoff = 0;
+ 				itemidptr->alignedlen = 0;
+ 			}
+ 		}
+ 		/* By here, there are exactly nline elements in itemidbase array */
+ 
+ 		if (totallen > (Size) (pd_special - pd_lower))
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_DATA_CORRUPTED),
+ 					 errmsg("corrupted item lengths: total %u, available space %u",
+ 							(unsigned int) totallen, pd_special - pd_lower)));
+ 
+ 		/* sort itemIdSortData array into decreasing itemoff order */
+ 		qsort((char *) itemidbase, nline, sizeof(itemIdSortData),
+ 			  itemoffcompare);
+ 
+ 		/*
+ 		 * Defragment the data areas of each tuple, being careful to preserve
+ 		 * each item's position in the linp array.
+ 		 */
+ 		upper = pd_special;
+ 		PageClearHasFreeLinePointers(page);
+ 		for (i = 0, itemidptr = itemidbase; i < nline; i++, itemidptr++)
+ 		{
+ 			ItemId		lp;
+ 
+ 			lp = PageGetItemId(page, itemidptr->offsetindex + 1);
+ 			if (itemidptr->alignedlen == 0)
+ 			{
+ 				PageSetHasFreeLinePointers(page);
+ 				ItemIdSetUnused(lp);
+ 				continue;
+ 			}
+ 			upper -= itemidptr->alignedlen;
+ 			memmove((char *) page + upper,
+ 					(char *) page + itemidptr->itemoff,
+ 					itemidptr->alignedlen);
+ 			lp->lp_off = upper;
+ 			/* lp_flags and lp_len remain the same as originally */
+ 		}
+ 
+ 		/* Set the new page limits */
+ 		phdr->pd_upper = upper;
+ 		phdr->pd_lower = SizeOfPageHeaderData + i * sizeof(ItemIdData);
+ 	}
+ }
  
  /*
   * Set checksum for a page in shared buffers.
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 7349,7351 **** gincostestimate(PG_FUNCTION_ARGS)
--- 7349,7375 ----
  
  	PG_RETURN_VOID();
  }
+ 
+ Datum
+ mmcostestimate(PG_FUNCTION_ARGS)
+ {
+ 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ 	IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
+ 	double		loop_count = PG_GETARG_FLOAT8(2);
+ 	Cost	   *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
+ 	Cost	   *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
+ 	Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
+ 	double	   *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ 	IndexOptInfo *index = path->indexinfo;
+ 
+ 	*indexStartupCost = (Cost) seq_page_cost * index->pages * loop_count;
+ 	*indexTotalCost = *indexStartupCost;
+ 
+ 	*indexSelectivity =
+ 		clauselist_selectivity(root, path->indexquals,
+ 							   path->indexinfo->rel->relid,
+ 							   JOIN_INNER, NULL);
+ 	*indexCorrelation = 1;
+ 
+ 	PG_RETURN_VOID();
+ }
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
***************
*** 112,117 **** extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
--- 112,119 ----
  					 bool allow_strat, bool allow_sync);
  extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
  				  int nkeys, ScanKey key);
+ extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
+ 		   BlockNumber endBlk);
  extern void heap_rescan(HeapScanDesc scan, ScanKey key);
  extern void heap_endscan(HeapScanDesc scan);
  extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
*** /dev/null
--- b/src/include/access/minmax.h
***************
*** 0 ****
--- 1,52 ----
+ /*
+  * AM-callable functions for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax.h
+  */
+ #ifndef MINMAX_H
+ #define MINMAX_H
+ 
+ #include "fmgr.h"
+ #include "nodes/execnodes.h"
+ #include "utils/relcache.h"
+ 
+ 
+ /*
+  * prototypes for functions in minmax.c (external entry points for minmax)
+  */
+ extern Datum mmbuild(PG_FUNCTION_ARGS);
+ extern Datum mmbuildempty(PG_FUNCTION_ARGS);
+ extern Datum mminsert(PG_FUNCTION_ARGS);
+ extern Datum mmbeginscan(PG_FUNCTION_ARGS);
+ extern Datum mmgettuple(PG_FUNCTION_ARGS);
+ extern Datum mmgetbitmap(PG_FUNCTION_ARGS);
+ extern Datum mmrescan(PG_FUNCTION_ARGS);
+ extern Datum mmendscan(PG_FUNCTION_ARGS);
+ extern Datum mmmarkpos(PG_FUNCTION_ARGS);
+ extern Datum mmrestrpos(PG_FUNCTION_ARGS);
+ extern Datum mmbulkdelete(PG_FUNCTION_ARGS);
+ extern Datum mmvacuumcleanup(PG_FUNCTION_ARGS);
+ extern Datum mmcanreturn(PG_FUNCTION_ARGS);
+ extern Datum mmcostestimate(PG_FUNCTION_ARGS);
+ extern Datum mmoptions(PG_FUNCTION_ARGS);
+ 
+ /*
+  * Storage type for MinMax' reloptions
+  */
+ typedef struct MinmaxOptions
+ {
+ 	int32		vl_len_;		/* varlena header (do not touch directly!) */
+ 	BlockNumber	pagesPerRange;
+ } MinmaxOptions;
+ 
+ #define MINMAX_DEFAULT_PAGES_PER_RANGE	128
+ #define MinmaxGetPagesPerRange(relation) \
+ 	((relation)->rd_options ? \
+ 	 ((MinmaxOptions *) (relation)->rd_options)->pagesPerRange : \
+ 	  MINMAX_DEFAULT_PAGES_PER_RANGE)
+ 
+ #endif   /* MINMAX_H */
*** /dev/null
--- b/src/include/access/minmax_internal.h
***************
*** 0 ****
--- 1,86 ----
+ /*
+  * minmax_internal.h
+  *		internal declarations for MinMax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_internal.h
+  */
+ #ifndef MINMAX_INTERNAL_H
+ #define MINMAX_INTERNAL_H
+ 
+ #include "fmgr.h"
+ #include "storage/buf.h"
+ #include "storage/bufpage.h"
+ #include "storage/off.h"
+ #include "utils/relcache.h"
+ 
+ 
+ /*
+  * A MinmaxDesc is a struct designed to enable decoding a MinMax tuple from the
+  * on-disk format to a DeformedMMTuple and vice-versa.
+  */
+ 
+ /* struct returned by "OpcInfo" amproc */
+ typedef struct MinmaxOpcInfo
+ {
+ 	/* Number of columns stored in an index column of this opclass */
+ 	uint16	oi_nstored;
+ 
+ 	/* Opaque pointer for the opclass' private use */
+ 	void   *oi_opaque;
+ 
+ 	/* Type IDs of the stored columns */
+ 	Oid		oi_typids[FLEXIBLE_ARRAY_MEMBER];
+ } MinmaxOpcInfo;
+ 
+ /* the size of a MinmaxOpcInfo for the given number of columns */
+ #define SizeofMinmaxOpcInfo(ncols) \
+ 	(offsetof(MinmaxOpcInfo, oi_typids) + sizeof(Oid) * ncols)
+ 
+ typedef struct MinmaxDesc
+ {
+ 	/* the index relation itself */
+ 	Relation	md_index;
+ 
+ 	/* tuple descriptor of the index relation */
+ 	TupleDesc	md_tupdesc;
+ 
+ 	/* cached copy for on-disk tuples; generated at first use */
+ 	TupleDesc	md_disktdesc;
+ 
+ 	/* total number of Datum entries that are stored on-disk for all columns */
+ 	int			md_totalstored;
+ 
+ 	/* per-column info */
+ 	MinmaxOpcInfo *md_info[FLEXIBLE_ARRAY_MEMBER];	/* md_tupdesc->natts entries long */
+ } MinmaxDesc;
+ 
+ /*
+  * Globally-known function support numbers for Minmax indexes.  Individual
+  * opclasses define their own function support numbers, which must not collide
+  * with the definitions here.
+  */
+ #define MINMAX_PROCNUM_OPCINFO		1
+ #define MINMAX_PROCNUM_ADDVALUE		2
+ #define MINMAX_PROCNUM_CONSISTENT	3
+ 
+ #define MINMAX_DEBUG
+ 
+ /* we allow debug if using GCC; otherwise don't bother */
+ #if defined(MINMAX_DEBUG) && defined(__GNUC__)
+ #define MINMAX_elog(level, ...)		elog(level, __VA_ARGS__)
+ #else
+ #define MINMAX_elog(a)	void(0)
+ #endif
+ 
+ /* minmax.c */
+ extern MinmaxDesc *minmax_build_mmdesc(Relation rel);
+ extern void minmax_free_mmdesc(MinmaxDesc *mmdesc);
+ extern void mm_page_init(Page page, uint16 type);
+ extern void mm_metapage_init(Page page, BlockNumber pagesPerRange,
+ 				 uint16 version);
+ 
+ #endif   /* MINMAX_INTERNAL_H */
*** /dev/null
--- b/src/include/access/minmax_page.h
***************
*** 0 ****
--- 1,70 ----
+ /*
+  * Prototypes and definitions for minmax page layouts
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_page.h
+  *
+  * NOTES
+  *
+  * These structs should really be private to specific minmax files, but it's
+  * useful to have them here so that they can be used by pageinspect and similar
+  * tools.
+  */
+ #ifndef MINMAX_PAGE_H
+ #define MINMAX_PAGE_H
+ 
+ #include "storage/block.h"
+ #include "storage/itemptr.h"
+ 
+ /* special space on all minmax pages stores a "type" identifier */
+ #define		MINMAX_PAGETYPE_META			0xF091
+ #define		MINMAX_PAGETYPE_REVMAP			0xF092
+ #define		MINMAX_PAGETYPE_REGULAR			0xF093
+ 
+ #define MINMAX_PAGE_TYPE(page) 	\
+ 	(((MinmaxSpecialSpace *) PageGetSpecialPointer(page))->type)
+ #define MINMAX_IS_REVMAP_PAGE(page) (MINMAX_PAGE_TYPE(page) == MINMAX_PAGETYPE_REVMAP)
+ #define MINMAX_IS_REGULAR_PAGE(page) (MINMAX_PAGE_TYPE(page) == MINMAX_PAGETYPE_REGULAR)
+ 
+ /* flags */
+ #define		MINMAX_EVACUATE_PAGE			1
+ 
+ typedef struct MinmaxSpecialSpace
+ {
+ 	uint16		flags;
+ 	uint16		type;
+ } MinmaxSpecialSpace;
+ 
+ /* Metapage definitions */
+ typedef struct MinmaxMetaPageData
+ {
+ 	uint32	minmaxMagic;
+ 	uint32	minmaxVersion;
+ 	BlockNumber	pagesPerRange;
+ 	BlockNumber lastRevmapPage;
+ } MinmaxMetaPageData;
+ 
+ #define MINMAX_CURRENT_VERSION		1
+ #define MINMAX_META_MAGIC			0xA8109CFA
+ 
+ #define MINMAX_METAPAGE_BLKNO		0
+ #define MINMAX_REVMAP_FIRST_BLKNO	1
+ 
+ /* Definitions for regular revmap pages */
+ typedef struct RevmapContents
+ {
+ 	ItemPointerData rmr_tids[1];	/* really REVMAP_PAGE_MAXITEMS */
+ } RevmapContents;
+ 
+ #define REVMAP_CONTENT_SIZE	\
+ 	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
+ 	 offsetof(RevmapContents, rmr_tids) - \
+ 	 MAXALIGN(sizeof(MinmaxSpecialSpace)))
+ /* max num of items in the array */
+ #define REVMAP_PAGE_MAXITEMS \
+ 	(REVMAP_CONTENT_SIZE / sizeof(ItemPointerData))
+ 
+ #endif		/* MINMAX_PAGE_H */
*** /dev/null
--- b/src/include/access/minmax_pageops.h
***************
*** 0 ****
--- 1,29 ----
+ /*
+  * Prototypes for operating on minmax pages.
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/include/access/minmax_pageops.h
+  */
+ #ifndef MINMAX_PAGEOPS_H
+ #define MINMAX_PAGEOPS_H
+ 
+ #include "access/minmax_revmap.h"
+ 
+ extern bool mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 			Buffer oldbuf, OffsetNumber oldoff,
+ 			const MMTuple *origtup, Size origsz,
+ 			const MMTuple *newtup, Size newsz,
+ 			bool samepage, bool *extended);
+ extern void mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk,
+ 			MMTuple *tup, Size itemsz, bool *extended);
+ 
+ extern bool mm_start_evacuating_page(Relation idxRel, Buffer buf);
+ extern void mm_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
+ 				 mmRevmapAccess *rmAccess, Buffer buf);
+ 
+ #endif	/* MINMAX_PAGEOPS_H */
*** /dev/null
--- b/src/include/access/minmax_revmap.h
***************
*** 0 ****
--- 1,36 ----
+ /*
+  * prototypes for minmax reverse range maps
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *		src/include/access/minmax_revmap.h
+  */
+ 
+ #ifndef MINMAX_REVMAP_H
+ #define MINMAX_REVMAP_H
+ 
+ #include "access/minmax_tuple.h"
+ #include "storage/block.h"
+ #include "storage/buf.h"
+ #include "storage/itemptr.h"
+ #include "storage/off.h"
+ #include "utils/relcache.h"
+ 
+ /* struct definition lives in mmrevmap.c */
+ typedef struct mmRevmapAccess mmRevmapAccess;
+ 
+ extern mmRevmapAccess *mmRevmapAccessInit(Relation idxrel,
+ 				   BlockNumber *pagesPerRange);
+ extern void mmRevmapAccessTerminate(mmRevmapAccess *rmAccess);
+ 
+ extern Buffer mmLockRevmapPageForUpdate(mmRevmapAccess *rmAccess,
+ 						  BlockNumber heapBlk);
+ extern void mmSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange,
+ 					  BlockNumber heapBlk, ItemPointerData tid);
+ extern MMTuple *mmGetMMTupleForHeapBlock(mmRevmapAccess *rmAccess,
+ 						 BlockNumber heapBlk, Buffer *buf, OffsetNumber *off,
+ 						 int mode);
+ 
+ #endif   /* MINMAX_REVMAP_H */
*** /dev/null
--- b/src/include/access/minmax_tuple.h
***************
*** 0 ****
--- 1,90 ----
+ /*
+  * Declarations for dealing with MinMax-specific tuples.
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/include/access/minmax_tuple.h
+  */
+ #ifndef MINMAX_TUPLE_H
+ #define MINMAX_TUPLE_H
+ 
+ #include "access/minmax_internal.h"
+ #include "access/tupdesc.h"
+ 
+ 
+ /*
+  * A minmax index stores one index tuple per page range.  Each index tuple
+  * has one MMValues struct for each indexed column; in turn, each MMValues
+  * has (besides the null flags) an array of Datum whose size is determined by
+  * the opclass.
+  */
+ typedef struct MMValues
+ {
+ 	bool		hasnulls;		/* is there any nulls in the page range? */
+ 	bool		allnulls;		/* are all values nulls in the page range? */
+ 	Datum	   *values;			/* current accumulated values */
+ } MMValues;
+ 
+ /*
+  * This struct represents one index tuple, comprising the minimum and maximum
+  * values for all indexed columns, within one page range.  These values can
+  * only be meaningfully decoded with an appropriate MinmaxDesc.
+  */
+ typedef struct DeformedMMTuple
+ {
+ 	BlockNumber	dt_blkno;	/* heap blkno that the tuple is for */
+ 	MMValues	dt_columns[FLEXIBLE_ARRAY_MEMBER];
+ } DeformedMMTuple;
+ 
+ /*
+  * An on-disk minmax tuple.  This is possibly followed by a nulls bitmask, with
+  * room for 2 null bits (two bits for each indexed column); an opclass-defined
+  * number of Datum values for each column follow.
+  */
+ typedef struct MMTuple
+ {
+ 	/* heap block number that the tuple is for */
+ 	BlockNumber mt_blkno;
+ 
+ 	/* ---------------
+ 	 * mt_info is laid out in the following fashion:
+ 	 *
+ 	 * 7th (high) bit: has nulls
+ 	 * 6th bit: unused
+ 	 * 5th bit: unused
+ 	 * 4-0 bit: offset of data
+ 	 * ---------------
+ 	 */
+ 	uint8		mt_info;
+ } MMTuple;
+ 
+ #define SizeOfMinMaxTuple	(offsetof(MMTuple, mt_info) + sizeof(uint8))
+ 
+ /*
+  * t_info manipulation macros
+  */
+ #define MMIDX_OFFSET_MASK 0x1F
+ /* bit 0x20 is not used at present */
+ /* bit 0x40 is not used at present */
+ #define MMIDX_NULLS_MASK 0x80
+ 
+ #define MMTupleDataOffset(mmtup)	((Size) (((MMTuple *) (mmtup))->mt_info & MMIDX_OFFSET_MASK))
+ #define MMTupleHasNulls(mmtup)	(((((MMTuple *) (mmtup))->mt_info & MMIDX_NULLS_MASK)) != 0)
+ 
+ 
+ extern MMTuple *minmax_form_tuple(MinmaxDesc *mmdesc, BlockNumber blkno,
+ 				  DeformedMMTuple *tuple, Size *size);
+ extern void minmax_free_tuple(MMTuple *tuple);
+ extern MMTuple *minmax_copy_tuple(MMTuple *tuple, Size len);
+ extern bool minmax_tuples_equal(const MMTuple *a, Size alen,
+ 					const MMTuple *b, Size blen);
+ 
+ extern DeformedMMTuple *minmax_new_dtuple(MinmaxDesc *mmdesc);
+ extern void minmax_dtuple_initialize(DeformedMMTuple *dtuple,
+ 						 MinmaxDesc *mmdesc);
+ extern DeformedMMTuple *minmax_deform_tuple(MinmaxDesc *mmdesc,
+ 					MMTuple *tuple);
+ 
+ #endif   /* MINMAX_TUPLE_H */
*** /dev/null
--- b/src/include/access/minmax_xlog.h
***************
*** 0 ****
--- 1,106 ----
+ /*-------------------------------------------------------------------------
+  *
+  * minmax_xlog.h
+  *	  POSTGRES MinMax access XLOG definitions.
+  *
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/access/minmax_xlog.h
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef MINMAX_XLOG_H
+ #define MINMAX_XLOG_H
+ 
+ #include "access/xlog.h"
+ #include "storage/bufpage.h"
+ #include "storage/itemptr.h"
+ #include "storage/relfilenode.h"
+ #include "utils/relcache.h"
+ 
+ 
+ /*
+  * WAL record definitions for minmax's WAL operations
+  *
+  * XLOG allows to store some information in high 4 bits of log
+  * record xl_info field.
+  */
+ #define XLOG_MINMAX_CREATE_INDEX	0x00
+ #define XLOG_MINMAX_INSERT			0x10
+ #define XLOG_MINMAX_UPDATE			0x20
+ #define XLOG_MINMAX_SAMEPAGE_UPDATE	0x30
+ #define XLOG_MINMAX_REVMAP_EXTEND	0x40
+ #define XLOG_MINMAX_REVMAP_VACUUM	0x50
+ 
+ #define XLOG_MINMAX_OPMASK			0x70
+ /*
+  * When we insert the first item on a new page, we restore the entire page in
+  * redo.
+  */
+ #define XLOG_MINMAX_INIT_PAGE		0x80
+ 
+ /* This is what we need to know about a minmax index create */
+ typedef struct xl_minmax_createidx
+ {
+ 	BlockNumber pagesPerRange;
+ 	RelFileNode	node;
+ 	uint16		version;
+ } xl_minmax_createidx;
+ #define SizeOfMinmaxCreateIdx	(offsetof(xl_minmax_createidx, version) + sizeof(uint16))
+ 
+ /*
+  * This is what we need to know about a minmax tuple insert
+  */
+ typedef struct xl_minmax_insert
+ {
+ 	RelFileNode		node;
+ 	BlockNumber		heapBlk;
+ 
+ 	/* extra information needed to update the revmap */
+ 	BlockNumber		revmapBlk;
+ 	BlockNumber		pagesPerRange;
+ 
+ 	ItemPointerData	tid;
+ 	/* tuple data follows at end of struct */
+ } xl_minmax_insert;
+ 
+ #define SizeOfMinmaxInsert	(offsetof(xl_minmax_insert, tid) + sizeof(ItemPointerData))
+ 
+ /*
+  * A cross-page update is the same as an insert, but also store the old tid.
+  */
+ typedef struct xl_minmax_update
+ {
+ 	xl_minmax_insert new;
+ 	ItemPointerData	oldtid;
+ } xl_minmax_update;
+ 
+ #define SizeOfMinmaxUpdate	(offsetof(xl_minmax_update, oldtid) + sizeof(ItemPointerData))
+ 
+ /* This is what we need to know about a minmax tuple samepage update */
+ typedef struct xl_minmax_samepage_update
+ {
+ 	RelFileNode		node;
+ 	ItemPointerData	tid;
+ 	/* tuple data follows at end of struct */
+ } xl_minmax_samepage_update;
+ 
+ #define SizeOfMinmaxSamepageUpdate		(offsetof(xl_minmax_samepage_update, tid) + sizeof(ItemPointerData))
+ 
+ /* This is what we need to know about a revmap extension */
+ typedef struct xl_minmax_revmap_extend
+ {
+ 	RelFileNode		node;
+ 	BlockNumber		targetBlk;
+ } xl_minmax_revmap_extend;
+ 
+ #define SizeOfMinmaxRevmapExtend	(offsetof(xl_minmax_revmap_extend, targetBlk) + \
+ 								 sizeof(BlockNumber))
+ 
+ 
+ extern void minmax_desc(StringInfo buf, XLogRecord *record);
+ extern void minmax_redo(XLogRecPtr lsn, XLogRecord *record);
+ 
+ #endif	/* MINMAX_XLOG_H */
*** a/src/include/access/reloptions.h
--- b/src/include/access/reloptions.h
***************
*** 45,52 **** typedef enum relopt_kind
  	RELOPT_KIND_TABLESPACE = (1 << 7),
  	RELOPT_KIND_SPGIST = (1 << 8),
  	RELOPT_KIND_VIEW = (1 << 9),
  	/* if you add a new kind, make sure you update "last_default" too */
! 	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_VIEW,
  	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
  	RELOPT_KIND_MAX = (1 << 30)
  } relopt_kind;
--- 45,53 ----
  	RELOPT_KIND_TABLESPACE = (1 << 7),
  	RELOPT_KIND_SPGIST = (1 << 8),
  	RELOPT_KIND_VIEW = (1 << 9),
+ 	RELOPT_KIND_MINMAX = (1 << 10),
  	/* if you add a new kind, make sure you update "last_default" too */
! 	RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_MINMAX,
  	/* some compilers treat enums as signed ints, so we can't use 1 << 31 */
  	RELOPT_KIND_MAX = (1 << 30)
  } relopt_kind;
*** a/src/include/access/relscan.h
--- b/src/include/access/relscan.h
***************
*** 35,42 **** typedef struct HeapScanDescData
  	bool		rs_temp_snap;	/* unregister snapshot at scan end? */
  
  	/* state set up at initscan time */
! 	BlockNumber rs_nblocks;		/* number of blocks to scan */
  	BlockNumber rs_startblock;	/* block # to start at */
  	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
  	bool		rs_syncscan;	/* report location to syncscan logic? */
  
--- 35,44 ----
  	bool		rs_temp_snap;	/* unregister snapshot at scan end? */
  
  	/* state set up at initscan time */
! 	BlockNumber rs_nblocks;		/* total number of blocks in rel */
  	BlockNumber rs_startblock;	/* block # to start at */
+ 	BlockNumber	rs_initblock;	/* block # to consider initial of rel */
+ 	BlockNumber	rs_numblocks;	/* number of blocks to scan */
  	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
  	bool		rs_syncscan;	/* report location to syncscan logic? */
  
*** a/src/include/access/rmgrlist.h
--- b/src/include/access/rmgrlist.h
***************
*** 42,44 **** PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_xlog_startup, gin_xlog_cleanup
--- 42,45 ----
  PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_xlog_startup, gist_xlog_cleanup)
  PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, NULL, NULL)
  PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_xlog_startup, spg_xlog_cleanup)
+ PG_RMGR(RM_MINMAX_ID, "MinMax", minmax_redo, minmax_desc, NULL, NULL)
*** a/src/include/catalog/index.h
--- b/src/include/catalog/index.h
***************
*** 97,102 **** extern double IndexBuildHeapScan(Relation heapRelation,
--- 97,110 ----
  				   bool allow_sync,
  				   IndexBuildCallback callback,
  				   void *callback_state);
+ extern double IndexBuildHeapRangeScan(Relation heapRelation,
+ 						Relation indexRelation,
+ 						IndexInfo *indexInfo,
+ 						bool allow_sync,
+ 						BlockNumber start_blockno,
+ 						BlockNumber end_blockno,
+ 						IndexBuildCallback callback,
+ 						void *callback_state);
  
  extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
  
*** a/src/include/catalog/pg_am.h
--- b/src/include/catalog/pg_am.h
***************
*** 132,136 **** DESCR("GIN index access method");
--- 132,138 ----
  DATA(insert OID = 4000 (  spgist	0 5 f f f f f t f t f f f 0 spginsert spgbeginscan spggettuple spggetbitmap spgrescan spgendscan spgmarkpos spgrestrpos spgbuild spgbuildempty spgbulkdelete spgvacuumcleanup spgcanreturn spgcostestimate spgoptions ));
  DESCR("SP-GiST index access method");
  #define SPGIST_AM_OID 4000
+ DATA(insert OID = 3580 (  minmax	5 7 f f f f t t f t t f f 0 mminsert mmbeginscan - mmgetbitmap mmrescan mmendscan mmmarkpos mmrestrpos mmbuild mmbuildempty mmbulkdelete mmvacuumcleanup - mmcostestimate mmoptions ));
+ #define MINMAX_AM_OID 3580
  
  #endif   /* PG_AM_H */
*** a/src/include/catalog/pg_amop.h
--- b/src/include/catalog/pg_amop.h
***************
*** 845,848 **** DATA(insert (	3550	869 869 25 s	932 783 0 ));
--- 845,929 ----
  DATA(insert (	3550	869 869 26 s	933 783 0 ));
  DATA(insert (	3550	869 869 27 s	934 783 0 ));
  
+ /*
+  * int4_minmax_ops
+  */
+ DATA(insert (	4054     23   23 1 s	  97	3580 0 ));
+ DATA(insert (	4054     23   23 2 s	 523	3580 0 ));
+ DATA(insert (	4054     23   23 3 s	  96	3580 0 ));
+ DATA(insert (	4054     23   23 4 s	 525	3580 0 ));
+ DATA(insert (	4054     23   23 5 s	 521	3580 0 ));
+ 
+ /*
+  * numeric_minmax_ops
+  */
+ DATA(insert (	4055   1700 1700 1 s	1754	3580 0 ));
+ DATA(insert (	4055   1700 1700 2 s	1755	3580 0 ));
+ DATA(insert (	4055   1700 1700 3 s	1752	3580 0 ));
+ DATA(insert (	4055   1700 1700 4 s	1757	3580 0 ));
+ DATA(insert (	4055   1700 1700 5 s	1756	3580 0 ));
+ 
+ /*
+  * text_minmax_ops
+  */
+ DATA(insert (	4056     25   25 1 s	 664	3580 0 ));
+ DATA(insert (	4056     25   25 2 s	 665	3580 0 ));
+ DATA(insert (	4056     25   25 3 s	  98	3580 0 ));
+ DATA(insert (	4056     25   25 4 s	 667	3580 0 ));
+ DATA(insert (	4056     25   25 5 s	 666	3580 0 ));
+ 
+ /*
+  * time_minmax_ops
+  */
+ DATA(insert (	4057   1083 1083 1 s	1110	3580 0 ));
+ DATA(insert (	4057   1083 1083 2 s	1111	3580 0 ));
+ DATA(insert (	4057   1083 1083 3 s	1108	3580 0 ));
+ DATA(insert (	4057   1083 1083 4 s	1113	3580 0 ));
+ DATA(insert (	4057   1083 1083 5 s	1112	3580 0 ));
+ 
+ /*
+  * timetz_minmax_ops
+  */
+ DATA(insert (	4058   1266 1266 1 s	1552	3580 0 ));
+ DATA(insert (	4058   1266 1266 2 s	1553	3580 0 ));
+ DATA(insert (	4058   1266 1266 3 s	1550	3580 0 ));
+ DATA(insert (	4058   1266 1266 4 s	1555	3580 0 ));
+ DATA(insert (	4058   1266 1266 5 s	1554	3580 0 ));
+ 
+ /*
+  * timestamp_minmax_ops
+  */
+ DATA(insert (	4059   1114 1114 1 s	2062	3580 0 ));
+ DATA(insert (	4059   1114 1114 2 s	2063	3580 0 ));
+ DATA(insert (	4059   1114 1114 3 s	2060	3580 0 ));
+ DATA(insert (	4059   1114 1114 4 s	2065	3580 0 ));
+ DATA(insert (	4059   1114 1114 5 s	2064	3580 0 ));
+ 
+ /*
+  * timestamptz_minmax_ops
+  */
+ DATA(insert (	4060   1184 1184 1 s	1322	3580 0 ));
+ DATA(insert (	4060   1184 1184 2 s	1323	3580 0 ));
+ DATA(insert (	4060   1184 1184 3 s	1320	3580 0 ));
+ DATA(insert (	4060   1184 1184 4 s	1325	3580 0 ));
+ DATA(insert (	4060   1184 1184 5 s	1324	3580 0 ));
+ 
+ /*
+  * date_minmax_ops
+  */
+ DATA(insert (	4061   1082 1082 1 s	1095	3580 0 ));
+ DATA(insert (	4061   1082 1082 2 s	1096	3580 0 ));
+ DATA(insert (	4061   1082 1082 3 s	1093	3580 0 ));
+ DATA(insert (	4061   1082 1082 4 s	1098	3580 0 ));
+ DATA(insert (	4061   1082 1082 5 s	1097	3580 0 ));
+ 
+ /*
+  * char_minmax_ops
+  */
+ DATA(insert (	4062     18   18 1 s	 631	3580 0 ));
+ DATA(insert (	4062     18   18 2 s	 632	3580 0 ));
+ DATA(insert (	4062     18   18 3 s	  92	3580 0 ));
+ DATA(insert (	4062     18   18 4 s	 634	3580 0 ));
+ DATA(insert (	4062     18   18 5 s	 633	3580 0 ));
+ 
  #endif   /* PG_AMOP_H */
*** a/src/include/catalog/pg_amproc.h
--- b/src/include/catalog/pg_amproc.h
***************
*** 432,435 **** DATA(insert (	4017   25 25 3 4029 ));
--- 432,508 ----
  DATA(insert (	4017   25 25 4 4030 ));
  DATA(insert (	4017   25 25 5 4031 ));
  
+ /* minmax */
+ DATA(insert (   4054   23 23 1 3383 ));
+ DATA(insert (   4054   23 23 2 3384 ));
+ DATA(insert (   4054   23 23 3 3385 ));
+ DATA(insert (   4054   23 23 4   66 ));
+ DATA(insert (   4054   23 23 5  149 ));
+ DATA(insert (   4054   23 23 6  150 ));
+ DATA(insert (   4054   23 23 7  147 ));
+ 
+ DATA(insert (   4055   1700 1700 1 3386 ));
+ DATA(insert (   4055   1700 1700 2 3384 ));
+ DATA(insert (   4055   1700 1700 3 3385 ));
+ DATA(insert (   4055   1700 1700 4 1722 ));
+ DATA(insert (   4055   1700 1700 5 1723 ));
+ DATA(insert (   4055   1700 1700 6 1721 ));
+ DATA(insert (   4055   1700 1700 7 1720 ));
+ 
+ DATA(insert (   4056   25 25 1 3387 ));
+ DATA(insert (   4056   25 25 2 3384 ));
+ DATA(insert (   4056   25 25 3 3385 ));
+ DATA(insert (   4056   25 25 4  740 ));
+ DATA(insert (   4056   25 25 5  741 ));
+ DATA(insert (   4056   25 25 6  743 ));
+ DATA(insert (   4056   25 25 7  742 ));
+ 
+ DATA(insert (   4057   1083 1083 1 3388 ));
+ DATA(insert (   4057   1083 1083 2 3384 ));
+ DATA(insert (   4057   1083 1083 3 3385 ));
+ DATA(insert (   4057   1083 1083 4 1102 ));
+ DATA(insert (   4057   1083 1083 5 1103 ));
+ DATA(insert (   4057   1083 1083 6 1105 ));
+ DATA(insert (   4057   1083 1083 7 1104 ));
+ 
+ DATA(insert (   4058   1266 1266 1 3389 ));
+ DATA(insert (   4058   1266 1266 2 3384 ));
+ DATA(insert (   4058   1266 1266 3 3385 ));
+ DATA(insert (   4058   1266 1266 4 1354 ));
+ DATA(insert (   4058   1266 1266 5 1355 ));
+ DATA(insert (   4058   1266 1266 6 1356 ));
+ DATA(insert (   4058   1266 1266 7 1357 ));
+ 
+ DATA(insert (   4059   1114 1114 1 3390 ));
+ DATA(insert (   4059   1114 1114 2 3384 ));
+ DATA(insert (   4059   1114 1114 3 3385 ));
+ DATA(insert (   4059   1114 1114 4 2054 ));
+ DATA(insert (   4059   1114 1114 5 2055 ));
+ DATA(insert (   4059   1114 1114 6 2056 ));
+ DATA(insert (   4059   1114 1114 7 2057 ));
+ 
+ DATA(insert (   4060   1184 1184 1 3391 ));
+ DATA(insert (   4060   1184 1184 2 3384 ));
+ DATA(insert (   4060   1184 1184 3 3385 ));
+ DATA(insert (   4060   1184 1184 4 1154 ));
+ DATA(insert (   4060   1184 1184 5 1155 ));
+ DATA(insert (   4060   1184 1184 6 1156 ));
+ DATA(insert (   4060   1184 1184 7 1157 ));
+ 
+ DATA(insert (   4061   1082 1082 1 3392 ));
+ DATA(insert (   4061   1082 1082 2 3384 ));
+ DATA(insert (   4061   1082 1082 3 3385 ));
+ DATA(insert (   4061   1082 1082 4 1087 ));
+ DATA(insert (   4061   1082 1082 5 1088 ));
+ DATA(insert (   4061   1082 1082 6 1090 ));
+ DATA(insert (   4061   1082 1082 7 1089 ));
+ 
+ DATA(insert (   4062   18 18 1 3393 ));
+ DATA(insert (   4062   18 18 2 3384 ));
+ DATA(insert (   4062   18 18 3 3385 ));
+ DATA(insert (   4062   18 18 4 1246 ));
+ DATA(insert (   4062   18 18 5   72 ));
+ DATA(insert (   4062   18 18 6   74 ));
+ DATA(insert (   4062   18 18 7   73 ));
+ 
  #endif   /* PG_AMPROC_H */
*** a/src/include/catalog/pg_opclass.h
--- b/src/include/catalog/pg_opclass.h
***************
*** 235,239 **** DATA(insert (	403		jsonb_ops			PGNSP PGUID 4033  3802 t 0 ));
--- 235,248 ----
  DATA(insert (	405		jsonb_ops			PGNSP PGUID 4034  3802 t 0 ));
  DATA(insert (	2742	jsonb_ops			PGNSP PGUID 4036  3802 t 25 ));
  DATA(insert (	2742	jsonb_path_ops		PGNSP PGUID 4037  3802 f 23 ));
+ DATA(insert (	3580	int4_minmax_ops			PGNSP PGUID 4054    23 t 0 ));
+ DATA(insert (	3580	numeric_minmax_ops		PGNSP PGUID 4055  1700 t 0 ));
+ DATA(insert (	3580	text_minmax_ops			PGNSP PGUID 4056    25 t 0 ));
+ DATA(insert (	3580	time_minmax_ops			PGNSP PGUID 4057  1083 t 0 ));
+ DATA(insert (	3580	timetz_minmax_ops		PGNSP PGUID 4058  1266 t 0 ));
+ DATA(insert (	3580	timestamp_minmax_ops	PGNSP PGUID 4059  1114 t 0 ));
+ DATA(insert (	3580	timestamptz_minmax_ops	PGNSP PGUID 4060  1184 t 0 ));
+ DATA(insert (	3580	date_minmax_ops			PGNSP PGUID 4061  1082 t 0 ));
+ DATA(insert (	3580	char_minmax_ops			PGNSP PGUID 4062    18 t 0 ));
  
  #endif   /* PG_OPCLASS_H */
*** a/src/include/catalog/pg_opfamily.h
--- b/src/include/catalog/pg_opfamily.h
***************
*** 157,160 **** DATA(insert OID = 4035 (	783		jsonb_ops		PGNSP PGUID ));
--- 157,170 ----
  DATA(insert OID = 4036 (	2742	jsonb_ops		PGNSP PGUID ));
  DATA(insert OID = 4037 (	2742	jsonb_path_ops	PGNSP PGUID ));
  
+ DATA(insert OID = 4054 (	3580	int4_minax_ops			PGNSP PGUID ));
+ DATA(insert OID = 4055 (	3580	numeric_minmax_ops		PGNSP PGUID ));
+ DATA(insert OID = 4056 (	3580	text_minmax_ops			PGNSP PGUID ));
+ DATA(insert OID = 4057 (	3580	time_minmax_ops			PGNSP PGUID ));
+ DATA(insert OID = 4058 (	3580	timetz_minmax_ops		PGNSP PGUID ));
+ DATA(insert OID = 4059 (	3580	timestamp_minmax_ops	PGNSP PGUID ));
+ DATA(insert OID = 4060 (	3580	timestamptz_minmax_ops	PGNSP PGUID ));
+ DATA(insert OID = 4061 (	3580	date_minmax_ops			PGNSP PGUID ));
+ DATA(insert OID = 4062 (    3580    char_minmax_ops			PGNSP PGUID ));
+ 
  #endif   /* PG_OPFAMILY_H */
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 565,570 **** DESCR("btree(internal)");
--- 565,598 ----
  DATA(insert OID = 2785 (  btoptions		   PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  btoptions _null_ _null_ _null_ ));
  DESCR("btree(internal)");
  
+ DATA(insert OID = 3789 (  mmgetbitmap	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_	mmgetbitmap _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3790 (  mminsert		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_	mminsert _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3791 (  mmbeginscan	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_	mmbeginscan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3792 (  mmrescan		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 5 0 2278 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmrescan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3793 (  mmendscan		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmendscan _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3794 (  mmmarkpos		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmmarkpos _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3795 (  mmrestrpos		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmrestrpos _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3796 (  mmbuild		   PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ mmbuild _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3797 (  mmbuildempty	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ mmbuildempty _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3798 (  mmbulkdelete	   PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 2281 "2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmbulkdelete _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3799 (  mmvacuumcleanup   PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmvacuumcleanup _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3800 (  mmcostestimate   PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmcostestimate _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ DATA(insert OID = 3801 (  mmoptions		   PGNSP PGUID 12 1 0 0 0 f f f f t f s 2 0 17 "1009 16" _null_ _null_ _null_ _null_  mmoptions _null_ _null_ _null_ ));
+ DESCR("minmax(internal)");
+ 
+ 
  DATA(insert OID = 339 (  poly_same		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_same _null_ _null_ _null_ ));
  DATA(insert OID = 340 (  poly_contain	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_contain _null_ _null_ _null_ ));
  DATA(insert OID = 341 (  poly_left		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "604 604" _null_ _null_ _null_ _null_ poly_left _null_ _null_ _null_ ));
***************
*** 4066,4071 **** DATA(insert OID = 2747 (  arrayoverlap		   PGNSP PGUID 12 1 0 0 0 f f f f t f i
--- 4094,4123 ----
  DATA(insert OID = 2748 (  arraycontains		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "2277 2277" _null_ _null_ _null_ _null_ arraycontains _null_ _null_ _null_ ));
  DATA(insert OID = 2749 (  arraycontained	   PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "2277 2277" _null_ _null_ _null_ _null_ arraycontained _null_ _null_ _null_ ));
  
+ /* Minmax */
+ DATA(insert OID = 3384 ( minmax_sortable_add_value PGNSP PGUID 12 1 0 0 0 f f f f t f i 5 0 16 "2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ mmSortableAddValue _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3385 ( minmax_sortable_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 16 "2281 2281 2281" _null_ _null_ _null_ _null_ mmSortableConsistent _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3383 ( minmax_sortable_opcinfo_int4 PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_int4 _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3386 ( minmax_sortable_opcinfo_numeric PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_numeric _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3387 ( minmax_sortable_opcinfo_text PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_text _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3388 ( minmax_sortable_opcinfo_time PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_time _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3389 ( minmax_sortable_opcinfo_timetz PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_timetz _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3390 ( minmax_sortable_opcinfo_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_timestamp _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3391 ( minmax_sortable_opcinfo_timestamptz PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_timestamptz _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3392 ( minmax_sortable_opcinfo_date PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_date _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ DATA(insert OID = 3393 ( minmax_sortable_opcinfo_char PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ mmSortableOpcInfo_char _null_ _null_ _null_ ));
+ DESCR("MinMax sortable datatype support");
+ 
  /* userlock replacements */
  DATA(insert OID = 2880 (  pg_advisory_lock				PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "20" _null_ _null_ _null_ _null_ pg_advisory_lock_int8 _null_ _null_ _null_ ));
  DESCR("obtain exclusive advisory lock");
*** a/src/include/storage/bufpage.h
--- b/src/include/storage/bufpage.h
***************
*** 403,408 **** extern Size PageGetExactFreeSpace(Page page);
--- 403,410 ----
  extern Size PageGetHeapFreeSpace(Page page);
  extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
  extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
+ extern void PageIndexDeleteNoCompact(Page page, OffsetNumber *itemnos,
+ 						 int nitems);
  extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
  extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
  
*** a/src/include/utils/selfuncs.h
--- b/src/include/utils/selfuncs.h
***************
*** 195,200 **** extern Datum hashcostestimate(PG_FUNCTION_ARGS);
--- 195,201 ----
  extern Datum gistcostestimate(PG_FUNCTION_ARGS);
  extern Datum spgcostestimate(PG_FUNCTION_ARGS);
  extern Datum gincostestimate(PG_FUNCTION_ARGS);
+ extern Datum mmcostestimate(PG_FUNCTION_ARGS);
  
  /* Functions in array_selfuncs.c */
  
*** a/src/test/regress/expected/opr_sanity.out
--- b/src/test/regress/expected/opr_sanity.out
***************
*** 1658,1663 **** ORDER BY 1, 2, 3;
--- 1658,1668 ----
         2742 |            9 | ?
         2742 |           10 | ?|
         2742 |           11 | ?&
+        3580 |            1 | <
+        3580 |            2 | <=
+        3580 |            3 | =
+        3580 |            4 | >=
+        3580 |            5 | >
         4000 |            1 | <<
         4000 |            1 | ~<~
         4000 |            2 | &<
***************
*** 1680,1686 **** ORDER BY 1, 2, 3;
         4000 |           15 | >
         4000 |           16 | @>
         4000 |           18 | =
! (80 rows)
  
  -- Check that all opclass search operators have selectivity estimators.
  -- This is not absolutely required, but it seems a reasonable thing
--- 1685,1691 ----
         4000 |           15 | >
         4000 |           16 | @>
         4000 |           18 | =
! (85 rows)
  
  -- Check that all opclass search operators have selectivity estimators.
  -- This is not absolutely required, but it seems a reasonable thing
***************
*** 1842,1852 **** WHERE NOT (
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
   amname | opfname | amproclefttype | amprocrighttype | procnums 
  --------+---------+----------------+-----------------+----------
--- 1847,1859 ----
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
+   -- MinMax has seven support functions, all mandatory
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{1, 2, 3, 4, 5, 6, 7}'
  );
   amname | opfname | amproclefttype | amprocrighttype | procnums 
  --------+---------+----------------+-----------------+----------
***************
*** 1867,1873 **** WHERE NOT (
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
   amname | opcname | procnums 
  --------+---------+----------
--- 1874,1881 ----
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{1, 2, 3, 4, 5, 6, 7}'
  );
   amname | opcname | procnums 
  --------+---------+----------
*** a/src/test/regress/sql/opr_sanity.sql
--- b/src/test/regress/sql/opr_sanity.sql
***************
*** 1195,1205 **** WHERE NOT (
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
  
  -- Also, check if there are any pg_opclass entries that don't seem to have
--- 1195,1207 ----
    -- GIN has six support functions. 1-3 are mandatory, 5 is optional, and
    --   at least one of 4 and 6 must be given.
    -- SP-GiST has five support functions, all mandatory
+   -- MinMax has seven support functions, all mandatory
    amname = 'btree' AND procnums @> '{1}' OR
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{1, 2, 3, 4, 5, 6, 7}'
  );
  
  -- Also, check if there are any pg_opclass entries that don't seem to have
***************
*** 1218,1224 **** WHERE NOT (
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}'
  );
  
  -- Unfortunately, we can't check the amproc link very well because the
--- 1220,1227 ----
    amname = 'hash' AND procnums = '{1}' OR
    amname = 'gist' AND procnums @> '{1, 2, 3, 4, 5, 6, 7}' OR
    amname = 'gin' AND (procnums @> '{1, 2, 3}' AND (procnums && '{4, 6}')) OR
!   amname = 'spgist' AND procnums = '{1, 2, 3, 4, 5}' OR
!   amname = 'minmax' AND procnums = '{1, 2, 3, 4, 5, 6, 7}'
  );
  
  -- Unfortunately, we can't check the amproc link very well because the
