*** src/backend/access/transam/xact.c.orig	Tue Apr  3 12:34:35 2007
--- src/backend/access/transam/xact.c	Wed Apr 25 20:32:00 2007
***************
*** 1631,1636 ****
--- 1631,1637 ----
  	/* smgrcommit already done */
  	AtEOXact_Files();
  	AtEOXact_ComboCid();
+ 	AtEOXact_HashTables(true);
  	pgstat_clear_snapshot();
  	pgstat_count_xact_commit();
  	pgstat_report_txn_timestamp(0);
***************
*** 1849,1854 ****
--- 1850,1856 ----
  	/* smgrcommit already done */
  	AtEOXact_Files();
  	AtEOXact_ComboCid();
+ 	AtEOXact_HashTables(true);
  	pgstat_clear_snapshot();
  
  	CurrentResourceOwner = NULL;
***************
*** 2003,2008 ****
--- 2005,2011 ----
  	smgrabort();
  	AtEOXact_Files();
  	AtEOXact_ComboCid();
+ 	AtEOXact_HashTables(false);
  	pgstat_clear_snapshot();
  	pgstat_count_xact_rollback();
  	pgstat_report_txn_timestamp(0);
***************
*** 3716,3721 ****
--- 3719,3725 ----
  						  s->parent->subTransactionId);
  	AtEOSubXact_Files(true, s->subTransactionId,
  					  s->parent->subTransactionId);
+ 	AtEOSubXact_HashTables(true, s->nestingLevel);
  
  	/*
  	 * We need to restore the upper transaction's read-only state, in case the
***************
*** 3827,3832 ****
--- 3831,3837 ----
  							  s->parent->subTransactionId);
  		AtEOSubXact_Files(false, s->subTransactionId,
  						  s->parent->subTransactionId);
+ 		AtEOSubXact_HashTables(false, s->nestingLevel);
  	}
  
  	/*
*** src/backend/commands/prepare.c.orig	Mon Apr 16 14:21:07 2007
--- src/backend/commands/prepare.c	Thu Apr 26 15:00:40 2007
***************
*** 21,27 ****
  #include "catalog/pg_type.h"
  #include "commands/explain.h"
  #include "commands/prepare.h"
! #include "funcapi.h"
  #include "parser/analyze.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
--- 21,27 ----
  #include "catalog/pg_type.h"
  #include "commands/explain.h"
  #include "commands/prepare.h"
! #include "miscadmin.h"
  #include "parser/analyze.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
***************
*** 743,834 ****
  Datum
  pg_prepared_statement(PG_FUNCTION_ARGS)
  {
! 	FuncCallContext *funcctx;
! 	HASH_SEQ_STATUS *hash_seq;
! 	PreparedStatement *prep_stmt;
  
! 	/* stuff done only on the first call of the function */
! 	if (SRF_IS_FIRSTCALL())
! 	{
! 		TupleDesc	tupdesc;
! 		MemoryContext oldcontext;
  
! 		/* create a function context for cross-call persistence */
! 		funcctx = SRF_FIRSTCALL_INIT();
  
! 		/*
! 		 * switch to memory context appropriate for multiple function calls
! 		 */
! 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
  
! 		/* allocate memory for user context */
! 		if (prepared_queries)
  		{
! 			hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
! 			hash_seq_init(hash_seq, prepared_queries);
! 			funcctx->user_fctx = (void *) hash_seq;
! 		}
! 		else
! 			funcctx->user_fctx = NULL;
  
! 		/*
! 		 * build tupdesc for result tuples. This must match the definition of
! 		 * the pg_prepared_statements view in system_views.sql
! 		 */
! 		tupdesc = CreateTemplateTupleDesc(5, false);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
! 						   TEXTOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
! 						   TEXTOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
! 						   TIMESTAMPTZOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
! 						   REGTYPEARRAYOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
! 						   BOOLOID, -1, 0);
! 
! 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
! 		MemoryContextSwitchTo(oldcontext);
! 	}
! 
! 	/* stuff done on every call of the function */
! 	funcctx = SRF_PERCALL_SETUP();
! 	hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
! 
! 	/* if the hash table is uninitialized, we're done */
! 	if (hash_seq == NULL)
! 		SRF_RETURN_DONE(funcctx);
! 
! 	prep_stmt = hash_seq_search(hash_seq);
! 	if (prep_stmt)
! 	{
! 		Datum		result;
! 		HeapTuple	tuple;
! 		Datum		values[5];
! 		bool		nulls[5];
  
! 		MemSet(nulls, 0, sizeof(nulls));
  
! 		values[0] = DirectFunctionCall1(textin,
  									  CStringGetDatum(prep_stmt->stmt_name));
  
! 		if (prep_stmt->plansource->query_string == NULL)
! 			nulls[1] = true;
! 		else
! 			values[1] = DirectFunctionCall1(textin,
  						CStringGetDatum(prep_stmt->plansource->query_string));
  
! 		values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
! 		values[3] = build_regtype_array(prep_stmt->plansource->param_types,
! 										prep_stmt->plansource->num_params);
! 		values[4] = BoolGetDatum(prep_stmt->from_sql);
! 
! 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
! 		result = HeapTupleGetDatum(tuple);
! 		SRF_RETURN_NEXT(funcctx, result);
  	}
  
! 	SRF_RETURN_DONE(funcctx);
  }
  
  /*
--- 743,841 ----
  Datum
  pg_prepared_statement(PG_FUNCTION_ARGS)
  {
! 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
! 	TupleDesc	tupdesc;
! 	Tuplestorestate *tupstore;
! 	MemoryContext per_query_ctx;
! 	MemoryContext oldcontext;
  
! 	/* check to see if caller supports us returning a tuplestore */
! 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
! 		ereport(ERROR,
! 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 				 errmsg("set-valued function called in context that cannot accept a set")));
! 	if (!(rsinfo->allowedModes & SFRM_Materialize))
! 		ereport(ERROR,
! 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 				 errmsg("materialize mode required, but it is not " \
! 						"allowed in this context")));
! 
! 	/* need to build tuplestore in query context */
! 	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
! 	oldcontext = MemoryContextSwitchTo(per_query_ctx);
  
! 	/*
! 	 * build tupdesc for result tuples. This must match the definition of
! 	 * the pg_prepared_statements view in system_views.sql
! 	 */
! 	tupdesc = CreateTemplateTupleDesc(5, false);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
! 					   TEXTOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
! 					   TEXTOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prepare_time",
! 					   TIMESTAMPTZOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "parameter_types",
! 					   REGTYPEARRAYOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "from_sql",
! 					   BOOLOID, -1, 0);
  
! 	/*
! 	 * We put all the tuples into a tuplestore in one scan of the hashtable.
! 	 * This avoids any issue of the hashtable possibly changing between calls.
! 	 */
! 	tupstore = tuplestore_begin_heap(true, false, work_mem);
! 
! 	/* hash table might be uninitialized */
! 	if (prepared_queries)
! 	{
! 		HASH_SEQ_STATUS hash_seq;
! 		PreparedStatement *prep_stmt;
  
! 		hash_seq_init(&hash_seq, prepared_queries);
! 		while ((prep_stmt = hash_seq_search(&hash_seq)) != NULL)
  		{
! 			HeapTuple	tuple;
! 			Datum		values[5];
! 			bool		nulls[5];
  
! 			/* generate junk in short-term context */
! 			MemoryContextSwitchTo(oldcontext);
  
! 			MemSet(nulls, 0, sizeof(nulls));
  
! 			values[0] = DirectFunctionCall1(textin,
  									  CStringGetDatum(prep_stmt->stmt_name));
  
! 			if (prep_stmt->plansource->query_string == NULL)
! 				nulls[1] = true;
! 			else
! 				values[1] = DirectFunctionCall1(textin,
  						CStringGetDatum(prep_stmt->plansource->query_string));
  
! 			values[2] = TimestampTzGetDatum(prep_stmt->prepare_time);
! 			values[3] = build_regtype_array(prep_stmt->plansource->param_types,
! 											prep_stmt->plansource->num_params);
! 			values[4] = BoolGetDatum(prep_stmt->from_sql);
! 
! 			tuple = heap_form_tuple(tupdesc, values, nulls);
! 
! 			/* switch to appropriate context while storing the tuple */
! 			MemoryContextSwitchTo(per_query_ctx);
! 			tuplestore_puttuple(tupstore, tuple);
! 		}
  	}
  
! 	/* clean up and return the tuplestore */
! 	tuplestore_donestoring(tupstore);
! 
! 	MemoryContextSwitchTo(oldcontext);
! 
! 	rsinfo->returnMode = SFRM_Materialize;
! 	rsinfo->setResult = tupstore;
! 	rsinfo->setDesc = tupdesc;
! 
! 	return (Datum) 0;
  }
  
  /*
*** src/backend/executor/nodeSubplan.c.orig	Mon Feb 26 20:11:25 2007
--- src/backend/executor/nodeSubplan.c	Wed Apr 25 19:22:50 2007
***************
*** 569,575 ****
  	TupleHashIterator hashiter;
  	TupleHashEntry entry;
  
! 	ResetTupleHashIterator(hashtable, &hashiter);
  	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
  	{
  		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
--- 569,575 ----
  	TupleHashIterator hashiter;
  	TupleHashEntry entry;
  
! 	InitTupleHashIterator(hashtable, &hashiter);
  	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
  	{
  		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
***************
*** 577,584 ****
--- 577,588 ----
  							   numCols, keyColIdx,
  							   hashtable->cur_eq_funcs,
  							   hashtable->tempcxt))
+ 		{
+ 			TermTupleHashIterator(&hashiter);
  			return true;
+ 		}
  	}
+ 	/* No TermTupleHashIterator call needed here */
  	return false;
  }
  
*** src/backend/nodes/tidbitmap.c.orig	Fri Jan  5 18:01:58 2007
--- src/backend/nodes/tidbitmap.c	Wed Apr 25 19:38:05 2007
***************
*** 907,913 ****
  		tbm_mark_page_lossy(tbm, page->blockno);
  
  		if (tbm->nentries <= tbm->maxentries)
! 			return;				/* we have done enough */
  
  		/*
  		 * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
--- 907,917 ----
  		tbm_mark_page_lossy(tbm, page->blockno);
  
  		if (tbm->nentries <= tbm->maxentries)
! 		{
! 			/* we have done enough */
! 			hash_seq_term(&status);
! 			break;
! 		}
  
  		/*
  		 * Note: tbm_mark_page_lossy may have inserted a lossy chunk into the
*** src/backend/utils/hash/dynahash.c.orig	Fri Jan  5 18:02:22 2007
--- src/backend/utils/hash/dynahash.c	Thu Apr 26 14:29:55 2007
***************
*** 63,68 ****
--- 63,69 ----
  
  #include "postgres.h"
  
+ #include "access/xact.h"
  #include "storage/shmem.h"
  #include "storage/spin.h"
  #include "utils/dynahash.h"
***************
*** 160,165 ****
--- 161,169 ----
  	char	   *tabname;		/* table name (for error messages) */
  	bool		isshared;		/* true if table is in shared memory */
  
+ 	/* freezing a shared table isn't allowed, so we can keep state here */
+ 	bool		frozen;			/* true = no more inserts allowed */
+ 
  	/* We keep local copies of these fixed values to reduce contention */
  	Size		keysize;		/* hash key length in bytes */
  	long		ssize;			/* segment size --- must be power of 2 */
***************
*** 195,200 ****
--- 199,207 ----
  static int	choose_nelem_alloc(Size entrysize);
  static bool init_htab(HTAB *hashp, long nelem);
  static void hash_corrupted(HTAB *hashp);
+ static void register_seq_scan(HTAB *hashp);
+ static void deregister_seq_scan(HTAB *hashp);
+ static bool has_seq_scans(HTAB *hashp);
  
  
  /*
***************
*** 356,361 ****
--- 363,370 ----
  					 errmsg("out of memory")));
  	}
  
+ 	hashp->frozen = false;
+ 
  	hdefault(hashp);
  
  	hctl = hashp->hctl;
***************
*** 898,903 ****
--- 907,916 ----
  			if (currBucket != NULL)
  				return (void *) ELEMENTKEY(currBucket);
  
+ 			/* disallow inserts if frozen */
+ 			if (hashp->frozen)
+ 				elog(ERROR, "cannot insert into a frozen hashtable");
+ 
  			currBucket = get_hash_entry(hashp);
  			if (currBucket == NULL)
  			{
***************
*** 925,934 ****
  
  			/* caller is expected to fill the data field on return */
  
! 			/* Check if it is time to split a bucket */
! 			/* Can't split if running in partitioned mode */
  			if (!IS_PARTITIONED(hctl) &&
! 			 hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor)
  			{
  				/*
  				 * NOTE: failure to expand table is not a fatal error, it just
--- 938,952 ----
  
  			/* caller is expected to fill the data field on return */
  
! 			/*
! 			 * Check if it is time to split a bucket.  Can't split if running
! 			 * in partitioned mode, nor if table is the subject of any active
! 			 * hash_seq_search scans.  Strange order of these tests is to try
! 			 * to check cheaper conditions first.
! 			 */
  			if (!IS_PARTITIONED(hctl) &&
! 				hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
! 				!has_seq_scans(hashp))
  			{
  				/*
  				 * NOTE: failure to expand table is not a fatal error, it just
***************
*** 1001,1018 ****
  }
  
  /*
!  * hash_seq_init/_search
   *			Sequentially search through hash table and return
   *			all the elements one by one, return NULL when no more.
   *
   * NOTE: caller may delete the returned element before continuing the scan.
   * However, deleting any other element while the scan is in progress is
   * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
   * if elements are added to the table while the scan is in progress, it is
   * unspecified whether they will be visited by the scan or not.
   *
   * NOTE: to use this with a partitioned hashtable, caller had better hold
   * at least shared lock on all partitions of the table throughout the scan!
   */
  void
  hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
--- 1019,1048 ----
  }
  
  /*
!  * hash_seq_init/_search/_term
   *			Sequentially search through hash table and return
   *			all the elements one by one, return NULL when no more.
   *
+  * hash_seq_term should be called if and only if the scan is abandoned before
+  * completion; if hash_seq_search returns NULL then it has already done the
+  * end-of-scan cleanup.
+  *
   * NOTE: caller may delete the returned element before continuing the scan.
   * However, deleting any other element while the scan is in progress is
   * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
   * if elements are added to the table while the scan is in progress, it is
   * unspecified whether they will be visited by the scan or not.
   *
+  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
+  * worry about hash_seq_term cleanup, if the hashtable is first locked against
+  * further insertions by calling hash_freeze.  This is used by nodeAgg.c,
+  * wherein it is inconvenient to track whether a scan is still open, and
+  * there's no possibility of further insertions after readout has begun.
+  *
   * NOTE: to use this with a partitioned hashtable, caller had better hold
   * at least shared lock on all partitions of the table throughout the scan!
+  * We can cope with insertions or deletions by our own backend, but *not*
+  * with concurrent insertions or deletions by another.
   */
  void
  hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
***************
*** 1020,1025 ****
--- 1050,1057 ----
  	status->hashp = hashp;
  	status->curBucket = 0;
  	status->curEntry = NULL;
+ 	if (!hashp->frozen)
+ 		register_seq_scan(hashp);
  }
  
  void *
***************
*** 1054,1060 ****
--- 1086,1095 ----
  	max_bucket = hctl->max_bucket;
  
  	if (curBucket > max_bucket)
+ 	{
+ 		hash_seq_term(status);
  		return NULL;			/* search is done */
+ 	}
  
  	/*
  	 * first find the right segment in the table directory.
***************
*** 1076,1081 ****
--- 1111,1117 ----
  		if (++curBucket > max_bucket)
  		{
  			status->curBucket = curBucket;
+ 			hash_seq_term(status);
  			return NULL;		/* search is done */
  		}
  		if (++segment_ndx >= ssize)
***************
*** 1094,1099 ****
--- 1130,1165 ----
  	return (void *) ELEMENTKEY(curElem);
  }
  
+ void
+ hash_seq_term(HASH_SEQ_STATUS *status)
+ {
+ 	if (!status->hashp->frozen)
+ 		deregister_seq_scan(status->hashp);
+ }
+ 
+ /*
+  * hash_freeze
+  *			Freeze a hashtable against future insertions (deletions are
+  *			still allowed)
+  *
+  * The reason for doing this is that by preventing any more bucket splits,
+  * we no longer need to worry about registering hash_seq_search scans,
+  * and thus caller need not be careful about ensuring hash_seq_term gets
+  * called at the right times.
+  *
+  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
+  * with active scans (since hash_seq_term would then do the wrong thing).
+  */
+ void
+ hash_freeze(HTAB *hashp)
+ {
+ 	if (hashp->isshared)
+ 		elog(ERROR, "cannot freeze shared hashtable");
+ 	if (!hashp->frozen && has_seq_scans(hashp))
+ 		elog(ERROR, "cannot freeze hashtable with active scans");
+ 	hashp->frozen = true;
+ }
+ 
  
  /********************************* UTILITIES ************************/
  
***************
*** 1323,1326 ****
--- 1389,1525 ----
  	for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
  		;
  	return i;
+ }
+ 
+ 
+ /************************* SEQ SCAN TRACKING ************************/
+ 
+ /*
+  * We track active hash_seq_search scans here.  The need for this mechanism
+  * comes from the fact that a scan will get confused if a bucket split occurs
+  * while it's in progress: it might visit entries twice, or even miss some
+  * entirely (if it's partway through the same bucket that splits).  Hence
+  * we want to inhibit bucket splits if there are any active scans on the
+  * table being inserted into.  This is a fairly rare case in current usage,
+  * so just postponing the split until the next insertion seems sufficient.
+  *
+  * Given present usages of the function, only a few scans are likely to be
+  * open concurrently; so a finite-size stack of open scans seems sufficient,
+  * and we don't worry that linear search is too slow.  Note that we do
+  * allow multiple scans of the same hashtable to be open concurrently.
+  *
+  * This mechanism can support concurrent scan and insertion in a shared
+  * hashtable if it's the same backend doing both.  It would fail otherwise,
+  * but locking reasons seem to preclude any such scenario anyway, so we don't
+  * worry.
+  *
+  * This arrangement is reasonably robust if a transient hashtable is deleted
+  * without notifying us.  The absolute worst case is we might inhibit splits
+  * in another table created later at exactly the same address.  We will give
+  * a warning at transaction end for reference leaks, so any bugs leading to
+  * lack of notification should be easy to catch.
+  */
+ 
+ #define MAX_SEQ_SCANS 100
+ 
+ static HTAB *seq_scan_tables[MAX_SEQ_SCANS];	/* tables being scanned */
+ static int	seq_scan_level[MAX_SEQ_SCANS];		/* subtransaction nest level */
+ static int	num_seq_scans = 0;
+ 
+ 
+ /* Register a table as having an active hash_seq_search scan */
+ static void
+ register_seq_scan(HTAB *hashp)
+ {
+ 	if (num_seq_scans >= MAX_SEQ_SCANS)
+ 		elog(ERROR, "too many active hash_seq_search scans");
+ 	seq_scan_tables[num_seq_scans] = hashp;
+ 	seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
+ 	num_seq_scans++;
+ }
+ 
+ /* Deregister an active scan */
+ static void
+ deregister_seq_scan(HTAB *hashp)
+ {
+ 	int		i;
+ 
+ 	/* Search backward since it's most likely at the stack top */
+ 	for (i = num_seq_scans - 1; i >= 0; i--)
+ 	{
+ 		if (seq_scan_tables[i] == hashp)
+ 		{
+ 			seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+ 			seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+ 			num_seq_scans--;
+ 			return;
+ 		}
+ 	}
+ 	elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
+ 		 hashp->tabname);
+ }
+ 
+ /* Check if a table has any active scan */
+ static bool
+ has_seq_scans(HTAB *hashp)
+ {
+ 	int		i;
+ 
+ 	for (i = 0; i < num_seq_scans; i++)
+ 	{
+ 		if (seq_scan_tables[i] == hashp)
+ 			return true;
+ 	}
+ 	return false;
+ }
+ 
+ /* Clean up any open scans at end of transaction */
+ void
+ AtEOXact_HashTables(bool isCommit)
+ {
+ 	/*
+ 	 * During abort cleanup, open scans are expected; just silently clean 'em
+ 	 * out.  An open scan at commit means someone forgot a hash_seq_term()
+ 	 * call, so complain.
+ 	 *
+ 	 * Note: it's tempting to try to print the tabname here, but refrain for
+ 	 * fear of touching deallocated memory.  This isn't a user-facing message
+ 	 * anyway, so it needn't be pretty.
+ 	 */
+ 	if (isCommit)
+ 	{
+ 		int		i;
+ 
+ 		for (i = 0; i < num_seq_scans; i++)
+ 		{
+ 			elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+ 				 seq_scan_tables[i]);
+ 		}
+ 	}
+ 	num_seq_scans = 0;
+ }
+ 
+ /* Clean up any open scans at end of subtransaction */
+ void
+ AtEOSubXact_HashTables(bool isCommit, int nestDepth)
+ {
+ 	int		i;
+ 
+ 	/*
+ 	 * Search backward to make cleanup easy.  Note we must check all entries,
+ 	 * not only those at the end of the array, because deletion technique
+ 	 * doesn't keep them in order.
+ 	 */
+ 	for (i = num_seq_scans - 1; i >= 0; i--)
+ 	{
+ 		if (seq_scan_level[i] >= nestDepth)
+ 		{
+ 			if (isCommit)
+ 				elog(WARNING, "leaked hash_seq_search scan for hash table %p",
+ 					 seq_scan_tables[i]);
+ 			seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
+ 			seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
+ 			num_seq_scans--;
+ 		}
+ 	}
  }
*** src/backend/utils/mmgr/portalmem.c.orig	Thu Apr 12 14:21:37 2007
--- src/backend/utils/mmgr/portalmem.c	Thu Apr 26 15:27:57 2007
***************
*** 22,28 ****
  #include "access/xact.h"
  #include "catalog/pg_type.h"
  #include "commands/portalcmds.h"
- #include "funcapi.h"
  #include "miscadmin.h"
  #include "utils/builtins.h"
  #include "utils/memutils.h"
--- 22,27 ----
***************
*** 621,627 ****
  		/* Zap all non-holdable portals */
  		PortalDrop(portal, true);
  
! 		/* Restart the iteration */
  		hash_seq_init(&status, PortalHashTable);
  	}
  }
--- 620,628 ----
  		/* Zap all non-holdable portals */
  		PortalDrop(portal, true);
  
! 		/* Restart the iteration in case that led to other drops */
! 		/* XXX is this really necessary? */
! 		hash_seq_term(&status);
  		hash_seq_init(&status, PortalHashTable);
  	}
  }
***************
*** 858,936 ****
  Datum
  pg_cursor(PG_FUNCTION_ARGS)
  {
! 	FuncCallContext *funcctx;
! 	HASH_SEQ_STATUS *hash_seq;
  	PortalHashEnt *hentry;
  
! 	/* stuff done only on the first call of the function */
! 	if (SRF_IS_FIRSTCALL())
! 	{
! 		MemoryContext oldcontext;
! 		TupleDesc	tupdesc;
! 
! 		/* create a function context for cross-call persistence */
! 		funcctx = SRF_FIRSTCALL_INIT();
! 
! 		/*
! 		 * switch to memory context appropriate for multiple function calls
! 		 */
! 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
! 
! 		if (PortalHashTable)
! 		{
! 			hash_seq = (HASH_SEQ_STATUS *) palloc(sizeof(HASH_SEQ_STATUS));
! 			hash_seq_init(hash_seq, PortalHashTable);
! 			funcctx->user_fctx = (void *) hash_seq;
! 		}
! 		else
! 			funcctx->user_fctx = NULL;
! 
! 		/*
! 		 * build tupdesc for result tuples. This must match the definition of
! 		 * the pg_cursors view in system_views.sql
! 		 */
! 		tupdesc = CreateTemplateTupleDesc(6, false);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
! 						   TEXTOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
! 						   TEXTOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
! 						   BOOLOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
! 						   BOOLOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
! 						   BOOLOID, -1, 0);
! 		TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
! 						   TIMESTAMPTZOID, -1, 0);
! 
! 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
! 		MemoryContextSwitchTo(oldcontext);
! 	}
  
! 	/* stuff done on every call of the function */
! 	funcctx = SRF_PERCALL_SETUP();
! 	hash_seq = (HASH_SEQ_STATUS *) funcctx->user_fctx;
! 
! 	/* if the hash table is uninitialized, we're done */
! 	if (hash_seq == NULL)
! 		SRF_RETURN_DONE(funcctx);
  
! 	/* loop until we find a visible portal or hit the end of the list */
! 	while ((hentry = hash_seq_search(hash_seq)) != NULL)
! 	{
! 		if (hentry->portal->visible)
! 			break;
! 	}
  
! 	if (hentry)
  	{
! 		Portal		portal;
! 		Datum		result;
  		HeapTuple	tuple;
  		Datum		values[6];
  		bool		nulls[6];
  
! 		portal = hentry->portal;
  		MemSet(nulls, 0, sizeof(nulls));
  
  		values[0] = DirectFunctionCall1(textin, CStringGetDatum(portal->name));
--- 859,926 ----
  Datum
  pg_cursor(PG_FUNCTION_ARGS)
  {
! 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
! 	TupleDesc	tupdesc;
! 	Tuplestorestate *tupstore;
! 	MemoryContext per_query_ctx;
! 	MemoryContext oldcontext;
! 	HASH_SEQ_STATUS hash_seq;
  	PortalHashEnt *hentry;
  
! 	/* check to see if caller supports us returning a tuplestore */
! 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
! 		ereport(ERROR,
! 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 				 errmsg("set-valued function called in context that cannot accept a set")));
! 	if (!(rsinfo->allowedModes & SFRM_Materialize))
! 		ereport(ERROR,
! 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 				 errmsg("materialize mode required, but it is not " \
! 						"allowed in this context")));
! 
! 	/* need to build tuplestore in query context */
! 	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
! 	oldcontext = MemoryContextSwitchTo(per_query_ctx);
  
! 	/*
! 	 * build tupdesc for result tuples. This must match the definition of
! 	 * the pg_cursors view in system_views.sql
! 	 */
! 	tupdesc = CreateTemplateTupleDesc(6, false);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
! 					   TEXTOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "statement",
! 					   TEXTOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_holdable",
! 					   BOOLOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_binary",
! 					   BOOLOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_scrollable",
! 					   BOOLOID, -1, 0);
! 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "creation_time",
! 					   TIMESTAMPTZOID, -1, 0);
  
! 	/*
! 	 * We put all the tuples into a tuplestore in one scan of the hashtable.
! 	 * This avoids any issue of the hashtable possibly changing between calls.
! 	 */
! 	tupstore = tuplestore_begin_heap(true, false, work_mem);
  
! 	hash_seq_init(&hash_seq, PortalHashTable);
! 	while ((hentry = hash_seq_search(&hash_seq)) != NULL)
  	{
! 		Portal		portal = hentry->portal;
  		HeapTuple	tuple;
  		Datum		values[6];
  		bool		nulls[6];
  
! 		/* report only "visible" entries */
! 		if (!portal->visible)
! 			continue;
! 
! 		/* generate junk in short-term context */
! 		MemoryContextSwitchTo(oldcontext);
! 
  		MemSet(nulls, 0, sizeof(nulls));
  
  		values[0] = DirectFunctionCall1(textin, CStringGetDatum(portal->name));
***************
*** 944,953 ****
  		values[4] = BoolGetDatum(portal->cursorOptions & CURSOR_OPT_SCROLL);
  		values[5] = TimestampTzGetDatum(portal->creation_time);
  
! 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
! 		result = HeapTupleGetDatum(tuple);
! 		SRF_RETURN_NEXT(funcctx, result);
  	}
  
! 	SRF_RETURN_DONE(funcctx);
  }
--- 934,954 ----
  		values[4] = BoolGetDatum(portal->cursorOptions & CURSOR_OPT_SCROLL);
  		values[5] = TimestampTzGetDatum(portal->creation_time);
  
! 		tuple = heap_form_tuple(tupdesc, values, nulls);
! 
! 		/* switch to appropriate context while storing the tuple */
! 		MemoryContextSwitchTo(per_query_ctx);
! 		tuplestore_puttuple(tupstore, tuple);
  	}
  
! 	/* clean up and return the tuplestore */
! 	tuplestore_donestoring(tupstore);
! 
! 	MemoryContextSwitchTo(oldcontext);
! 
! 	rsinfo->returnMode = SFRM_Materialize;
! 	rsinfo->setResult = tupstore;
! 	rsinfo->setDesc = tupdesc;
! 
! 	return (Datum) 0;
  }
*** src/include/nodes/execnodes.h.orig	Tue Mar 27 19:21:12 2007
--- src/include/nodes/execnodes.h	Wed Apr 25 19:21:40 2007
***************
*** 408,415 ****
  
  typedef HASH_SEQ_STATUS TupleHashIterator;
  
! #define ResetTupleHashIterator(htable, iter) \
  	hash_seq_init(iter, (htable)->hashtab)
  #define ScanTupleHashTable(iter) \
  	((TupleHashEntry) hash_seq_search(iter))
  
--- 408,427 ----
  
  typedef HASH_SEQ_STATUS TupleHashIterator;
  
! /*
!  * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
!  * Use ResetTupleHashIterator if the table can be frozen (in this case no
!  * explicit scan termination is needed).
!  */
! #define InitTupleHashIterator(htable, iter) \
  	hash_seq_init(iter, (htable)->hashtab)
+ #define TermTupleHashIterator(iter) \
+ 	hash_seq_term(iter)
+ #define ResetTupleHashIterator(htable, iter) \
+ 	do { \
+ 		hash_freeze((htable)->hashtab); \
+ 		hash_seq_init(iter, (htable)->hashtab); \
+ 	} while (0)
  #define ScanTupleHashTable(iter) \
  	((TupleHashEntry) hash_seq_search(iter))
  
*** src/include/utils/hsearch.h.orig	Fri Jan  5 18:02:59 2007
--- src/include/utils/hsearch.h	Wed Apr 25 20:29:18 2007
***************
*** 130,138 ****
--- 130,142 ----
  extern long hash_get_num_entries(HTAB *hashp);
  extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
  extern void *hash_seq_search(HASH_SEQ_STATUS *status);
+ extern void hash_seq_term(HASH_SEQ_STATUS *status);
+ extern void hash_freeze(HTAB *hashp);
  extern Size hash_estimate_size(long num_entries, Size entrysize);
  extern long hash_select_dirsize(long num_entries);
  extern Size hash_get_shared_size(HASHCTL *info, int flags);
+ extern void AtEOXact_HashTables(bool isCommit);
+ extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);
  
  /*
   * prototypes for functions in hashfn.c