diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 510d1c5..6455864 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -296,9 +296,18 @@ typedef struct AggHashEntryData *AggHashEntry;
 
 typedef struct AggHashEntryData
 {
-	TupleHashEntryData shared;	/* common header for hash table entries */
-	/* per-aggregate transition status array - must be last! */
+
+	/* pointer to the next entry in the bucket */
+	AggHashEntry next;
+
+	/* hash computed from the group keys (stored in mintuple) */
+	uint32	hashvalue;
+
+	/* minimal tuple storing values for group keys */
+	MinimalTuple tuple;
+
 	AggStatePerGroupData pergroup[1];	/* VARIABLE LENGTH ARRAY */
+
 }	AggHashEntryData;	/* VARIABLE LENGTH STRUCT */
 
 
@@ -321,7 +330,7 @@ static void finalize_aggregate(AggState *aggstate,
 				   Datum *resultVal, bool *resultIsNull);
 static Bitmapset *find_unaggregated_cols(AggState *aggstate);
 static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
-static void build_hash_table(AggState *aggstate);
+static void build_hash_table(AggState *aggstate, Size tuple_width);
 static AggHashEntry lookup_hash_entry(AggState *aggstate,
 				  TupleTableSlot *inputslot);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
@@ -329,6 +338,86 @@ static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
 
+static uint32 compute_hash_value(AggState * aggstate, TupleTableSlot * slot);
+static uint32 compute_bucket(AggState * aggstate, uint32 hashvalue);
+static bool groups_match(AggState * aggstate, TupleTableSlot *slot, AggHashEntry entry);
+static void increase_nbuckets(AggState * aggstate);
+static char * chunk_alloc(AggHashTable htab, int size);
+static void reset_hash_table(AggHashTable htab);
+
+static void IteratorReset(AggHashTable htab);
+static AggHashEntry IteratorGetNext(AggHashTable htab);
+
+/*
+ * The size of the chunks for dense allocation. This needs to be >8kB
+ * because the default (and only) memory context implementation uses
+ * 8kB as a boundary for keeping the blocks on a freelist. Which is
+ * exactly what we don't want here - we want to free the chunk when
+ * we don't need it (so that it can be reused for aggstate and so on).
+ *
+ * 16kB seems like a good default value.
+ */
+#define HASH_CHUNK_SIZE			(16*1024L)
+
+typedef struct HashChunkData
+{
+	int			ntuples;	/* number of tuples stored in this chunk */
+	Size		maxlen;		/* length of the buffer */
+	Size		used;		/* number of chunk bytes already used */
+
+	struct HashChunkData   *next;	/* pointer to the next chunk (linked list) */
+
+	char		data[1];	/* buffer allocated at the end */
+} HashChunkData;
+
+typedef struct HashChunkData* HashChunk;
+
+/*
+ * A simple hashtable, storing the data densely into larger chunks.
+ * Originally, HashAgg used dynahash (through methods in nodeGrouping.c)
+ * but that does not allow removing the entries and freeing memory. So
+ * this approach, already used in nodeHash.c was used here too (and
+ * wrapped a bit more nicely).
+ *
+ * The hash entries (containing the per-group data) and tuples (with
+ * keys of the group) are interleaved, i.e. the entry is always stored
+ * first, then the tuple (in a MinimalTuple format). The entries are
+ * always fixed size (either the aggregate state is passed by value and
+ * stored inline, or passed by reference and stored in a regularly
+ * palloced memory), the tuples are of arbitrary size.
+ */
+typedef struct AggHashTableData
+{
+
+	int	nentries;		/* number of hash table entries */
+	int	nbuckets;		/* current number of buckets */
+	int	nbuckets_max;	/* max number of buckets */
+
+	/* items copied from the TupleHashTable, because we still need them */
+	MemoryContext	tmpctx;		/* short-lived memory context (hash/eq funcs) */
+	AttrNumber 	   *keyColIdx;	/* attr numbers of key columns */
+	int				numCols;	/* number of columns */
+	TupleTableSlot *slot;		/* tuple slot for groups_match */
+	Size			entrysize;	/* size of hash table entry (no tuple) */
+
+	MemoryContext	htabctx;	/* memory context for the chunks */
+
+	/* buckets of the hash table */
+	AggHashEntry   *buckets;
+
+	/*
+	 * Used for iterating through the hash table - it keeps track of the
+	 * current chunk, and entry within the chunk. Use the provided
+	 * methods to initialize and advance the iterator.
+	 */
+	HashChunk		cur_chunk;
+	AggHashEntry	cur_entry;
+
+	/* list of chunks with dense-packed entries / minimal tuples */
+	HashChunk		chunks_hash;
+
+} AggHashTableData;
+
 
 /*
  * Initialize all aggregates for a new group of input values.
@@ -928,26 +1017,111 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
  * The hash table always lives in the aggcontext memory context.
  */
 static void
-build_hash_table(AggState *aggstate)
+build_hash_table(AggState *aggstate, Size tuple_width)
 {
 	Agg		   *node = (Agg *) aggstate->ss.ps.plan;
-	MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
-	Size		entrysize;
+	Size		entrysize;	/* size of entry in the hash table */
+	Size		groupsize;	/* space used by the group (includes bucket) */
+	AggHashTable	htab;
+
+	/* we assume 1024 buckets (i.e. 8kB of memory) is minimum */
+	int nbuckets     = 1024;
+	int nbuckets_max = 1024;
 
 	Assert(node->aggstrategy == AGG_HASHED);
 	Assert(node->numGroups > 0);
 
-	entrysize = sizeof(AggHashEntryData) +
-		(aggstate->numaggs - 1) * sizeof(AggStatePerGroupData);
-
-	aggstate->hashtable = BuildTupleHashTable(node->numCols,
-											  node->grpColIdx,
-											  aggstate->eqfunctions,
-											  aggstate->hashfunctions,
-											  node->numGroups,
-											  entrysize,
-											  aggstate->aggcontext,
-											  tmpmem);
+	/*
+	 * Compute size of the hash table entry (this is actual size, but it
+	 * does not include the MinimalTuple size, with values of the keys
+	 * for the group). There's only a pointer to the minimal tuple.
+	 */
+	entrysize = MAXALIGN(sizeof(AggHashEntryData) +
+				(aggstate->numaggs - 1) * sizeof(AggStatePerGroupData));
+
+	/*
+	 * Estimate the size of the group, so that we can estimate how many
+	 * of them fit into work_mem, and thus estimate what is the reasonable
+	 * max number of buckets that we can use. To do that we add the entry
+	 * size, a bucket (because we're shooting for <1 load factor), and
+	 * estimated tuple width (because we'll keep the first tuple for each
+	 * group because of group key values).
+	 *
+	 * XXX This does not include size of the aggregate states, passed by
+	 *     reference. First, we don't know how to determine that. However,
+	 *     if the states are small it won't make much difference and if
+	 *     the states get large the memory required for the buckets is
+	 *     going to be much less important.
+	 */
+	groupsize = entrysize + sizeof(AggHashEntry)
+				+ sizeof(MinimalTupleData) + tuple_width;
+
+	/*
+	 * determine maximum number of buckets that can fit into work_mem (along with
+	 * the entry)
+	 *
+	 * This assumes all the space is used by AggHashEntries, but many aggregates
+	 * are keeping state separate (e.g. as a "pass by reference" Datums), which
+	 * results in nbuckets_max values higher than possible in practice. But we
+	 * don't know that at this point, and we don't need to worry too much about
+	 * it because those aggregates do it to handle states that are significantly
+	 * larger than 8B, which makes the 8B per-bucket negligible.
+	 *
+	 * And of course, as mentioned above, this does not include the actual data
+	 * stored in the MinimalTuple.
+	 *
+	 * XXX We may re-evaluate this over time, as we'll know how many entries are
+	 *     there, and thus what is the average size of aggregate size. That is,
+	 *     as the state size grows, we may decrease the number of buckets. We'll
+	 *     save a bit of memory by that (although not much).
+	 */
+	while (nbuckets_max * groupsize <= work_mem * 1024L)
+		nbuckets_max *= 2;
+
+	/*
+	 * Update the initial number of buckets to match expected number of groups,
+	 * but don't grow over nbuckets_max because in that case we'll start with
+	 * the batching anyway.
+	 */
+	while ((nbuckets < node->numGroups) && (nbuckets < nbuckets_max))
+		nbuckets *= 2;
+
+	/*
+	 * XXX When batching, we might use (numGroups / nbuckets) as a starting
+	 * nbatch value, but maybe we can start with nbatch=1 with the assumption
+	 * that multiple tuples will be 'compressed' into the group (and thus
+	 * we'll write less data in total).
+	 */
+
+	htab = (AggHashTable)MemoryContextAllocZero(aggstate->aggcontext,
+											sizeof(AggHashTableData));
+
+	/* TODO create a memory context for the hash table */
+	htab->htabctx = AllocSetContextCreate(aggstate->aggcontext,
+											"HashAggHashTable",
+											ALLOCSET_DEFAULT_MINSIZE,
+											ALLOCSET_DEFAULT_INITSIZE,
+											ALLOCSET_DEFAULT_MAXSIZE);
+
+	/* buckets are just pointers to AggHashEntryData structures */
+	htab->buckets = (AggHashEntry*)MemoryContextAllocZero(htab->htabctx,
+									nbuckets * sizeof(AggHashEntry));
+
+	/* copy the column IDs from the node */
+	htab->keyColIdx = node->grpColIdx;
+
+	/* we'll use the per-tuple memory context for the hash/eq functions */
+	htab->tmpctx = aggstate->tmpcontext->ecxt_per_tuple_memory;
+
+	htab->nbuckets = nbuckets;
+	htab->nbuckets_max = nbuckets_max;
+	htab->nentries = 0;
+	htab->slot = NULL;
+	htab->numCols = node->numCols;
+	htab->entrysize = entrysize;
+
+	aggstate->hashtable = htab;
+
 }
 
 /*
@@ -1026,40 +1200,77 @@ hash_agg_entry_size(int numAggs)
 static AggHashEntry
 lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 {
-	TupleTableSlot *hashslot = aggstate->hashslot;
-	ListCell   *l;
-	AggHashEntry entry;
-	bool		isnew;
 
-	/* if first time through, initialize hashslot by cloning input slot */
-	if (hashslot->tts_tupleDescriptor == NULL)
+	AggHashEntry entry = NULL;
+	uint32		hashvalue;
+	uint32		bucketno;
+	MinimalTuple mintuple;
+
+	hashvalue = compute_hash_value(aggstate, inputslot);
+	bucketno = compute_bucket(aggstate, hashvalue);
+
+	entry = aggstate->hashtable->buckets[bucketno];
+
+	/* try to find a matching entry in the hash table (in the bucket) */
+	while (entry != NULL)
 	{
-		ExecSetSlotDescriptor(hashslot, inputslot->tts_tupleDescriptor);
-		/* Make sure all unused columns are NULLs */
-		ExecStoreAllNullTuple(hashslot);
+
+		/* first check the hashes, only then check the keys (if hashes match) */
+		if ((entry->hashvalue == hashvalue) && (groups_match(aggstate, inputslot, entry)))
+			break;
+
+		/* these are not the entries you're looking for ... */
+		entry = entry->next;
 	}
 
-	/* transfer just the needed columns into hashslot */
-	slot_getsomeattrs(inputslot, linitial_int(aggstate->hash_needed));
-	foreach(l, aggstate->hash_needed)
+	/* There's not a maching entry in the bucket, so create a new one and
+	 * copy in data both for the aggregates, and the MinimalTuple containing
+	 * keys for the group columns. */
+	if (entry == NULL)
 	{
-		int			varNumber = lfirst_int(l) - 1;
 
-		hashslot->tts_values[varNumber] = inputslot->tts_values[varNumber];
-		hashslot->tts_isnull[varNumber] = inputslot->tts_isnull[varNumber];
-	}
+		MemoryContext old;
 
-	/* find or create the hashtable entry using the filtered tuple */
-	entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
-												hashslot,
-												&isnew);
+		/* only a reference to the mintuple - we'll copy it into a chunk */
+		mintuple = ExecFetchSlotMinimalTuple(inputslot);
+
+		/* FIXME probably create a separate context for the hash table, instead
+		 * of using aggcontext for everything ... */
+		old = MemoryContextSwitchTo(aggstate->aggcontext);
+
+		/* we need enough space for the entry and tuple with key values */
+		entry = (AggHashEntry) chunk_alloc(aggstate->hashtable,
+							aggstate->hashtable->entrysize + mintuple->t_len);
+
+		entry->hashvalue = hashvalue;
+
+		/* add to the proper bucket */
+		entry->next = aggstate->hashtable->buckets[bucketno];
+		aggstate->hashtable->buckets[bucketno] = entry;
+
+		/* the tuple is placed right after the entry (maxaligned) */
+		entry->tuple = (MinimalTuple)((char*)entry + aggstate->hashtable->entrysize);
+
+		/*
+		 * FIXME This seems to copy all the data, including columns that are not part
+		 * of the key (i.e. are there only as inputs for the aggregates - that may be
+		 * quite wasteful when there are many aggregates / the values are long etc.)
+		 */
+		memcpy(entry->tuple, mintuple, mintuple->t_len);
+
+		MemoryContextSwitchTo(old);
 
-	if (isnew)
-	{
 		/* initialize aggregates for new tuple group */
 		initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
+
+		aggstate->hashtable->nentries += 1;
+
 	}
 
+	/* once we exceed 1 entry / bucket, increase number of buckets */
+	if (aggstate->hashtable->nentries > aggstate->hashtable->nbuckets)
+		increase_nbuckets(aggstate);
+
 	return entry;
 }
 
@@ -1363,8 +1574,10 @@ agg_fill_hash_table(AggState *aggstate)
 	}
 
 	aggstate->table_filled = true;
-	/* Initialize to walk the hash table */
-	ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+
+	/* Initialize for iteration through the table (first bucket / entry) */
+	IteratorReset(aggstate->hashtable);
+
 }
 
 /*
@@ -1381,6 +1594,7 @@ agg_retrieve_hash_table(AggState *aggstate)
 	AggHashEntry entry;
 	TupleTableSlot *firstSlot;
 	int			aggno;
+	AggHashTable	htab;
 
 	/*
 	 * get state info from node
@@ -1391,23 +1605,11 @@ agg_retrieve_hash_table(AggState *aggstate)
 	aggnulls = econtext->ecxt_aggnulls;
 	peragg = aggstate->peragg;
 	firstSlot = aggstate->ss.ss_ScanTupleSlot;
+	htab = aggstate->hashtable;
 
-	/*
-	 * We loop retrieving groups until we find one satisfying
-	 * aggstate->ss.ps.qual
-	 */
-	while (!aggstate->agg_done)
+	/* loop over entries in buckets */
+	while ((entry = IteratorGetNext(htab)) != NULL)
 	{
-		/*
-		 * Find the next entry in the hash table
-		 */
-		entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
-		if (entry == NULL)
-		{
-			/* No more entries in hashtable, so done */
-			aggstate->agg_done = TRUE;
-			return NULL;
-		}
 
 		/*
 		 * Clear the per-output-tuple context for each group
@@ -1419,19 +1621,19 @@ agg_retrieve_hash_table(AggState *aggstate)
 		ResetExprContext(econtext);
 
 		/*
-		 * Store the copied first input tuple in the tuple table slot reserved
-		 * for it, so that it can be used in ExecProject.
-		 */
-		ExecStoreMinimalTuple(entry->shared.firstTuple,
-							  firstSlot,
-							  false);
+		* Store the copied first input tuple in the tuple table slot reserved
+		* for it, so that it can be used in ExecProject.
+		*/
+		ExecStoreMinimalTuple(entry->tuple,
+							firstSlot,
+							false);
 
 		pergroup = entry->pergroup;
 
 		/*
-		 * Finalize each aggregate calculation, and stash results in the
-		 * per-output-tuple context.
-		 */
+		* Finalize each aggregate calculation, and stash results in the
+		* per-output-tuple context.
+		*/
 		for (aggno = 0; aggno < aggstate->numaggs; aggno++)
 		{
 			AggStatePerAgg peraggstate = &peragg[aggno];
@@ -1439,25 +1641,25 @@ agg_retrieve_hash_table(AggState *aggstate)
 
 			Assert(peraggstate->numSortCols == 0);
 			finalize_aggregate(aggstate, peraggstate, pergroupstate,
-							   &aggvalues[aggno], &aggnulls[aggno]);
+							&aggvalues[aggno], &aggnulls[aggno]);
 		}
 
 		/*
-		 * Use the representative input tuple for any references to
-		 * non-aggregated input columns in the qual and tlist.
-		 */
+		* Use the representative input tuple for any references to
+		* non-aggregated input columns in the qual and tlist.
+		*/
 		econtext->ecxt_outertuple = firstSlot;
 
 		/*
-		 * Check the qual (HAVING clause); if the group does not match, ignore
-		 * it and loop back to try to process another group.
-		 */
+		* Check the qual (HAVING clause); if the group does not match, ignore
+		* it and loop back to try to process another group.
+		*/
 		if (ExecQual(aggstate->ss.ps.qual, econtext, false))
 		{
 			/*
-			 * Form and return a projection tuple using the aggregate results
-			 * and the representative input tuple.
-			 */
+			* Form and return a projection tuple using the aggregate results
+			* and the representative input tuple.
+			*/
 			TupleTableSlot *result;
 			ExprDoneCond isDone;
 
@@ -1472,8 +1674,11 @@ agg_retrieve_hash_table(AggState *aggstate)
 		}
 		else
 			InstrCountFiltered1(aggstate, 1);
+
 	}
 
+	aggstate->agg_done = true;
+
 	/* No more groups */
 	return NULL;
 }
@@ -1515,7 +1720,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->agg_done = false;
 	aggstate->pergroup = NULL;
 	aggstate->grp_firstTuple = NULL;
-	aggstate->hashtable = NULL;
 
 	/*
 	 * Create expression contexts.  We need two, one for per-input-tuple
@@ -1546,7 +1750,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 */
 	ExecInitScanTupleSlot(estate, &aggstate->ss);
 	ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
-	aggstate->hashslot = ExecInitExtraTupleSlot(estate);
+
+	/* FIXME maybe we could reuse this in groups_match for better efficiency (?) */
+	// aggstate->hashslot = ExecInitExtraTupleSlot(estate);
 
 	/*
 	 * initialize child expressions
@@ -1636,7 +1842,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 
 	if (node->aggstrategy == AGG_HASHED)
 	{
-		build_hash_table(aggstate);
+		build_hash_table(aggstate, outerPlan->plan_width);
 		aggstate->table_filled = false;
 		/* Compute the columns we actually need to hash on */
 		aggstate->hash_needed = find_hash_columns(aggstate);
@@ -2073,7 +2279,7 @@ ExecReScanAgg(AggState *node)
 		 */
 		if (node->ss.ps.lefttree->chgParam == NULL)
 		{
-			ResetTupleHashIterator(node->hashtable, &node->hashiter);
+			IteratorReset(node->hashtable);
 			return;
 		}
 	}
@@ -2112,8 +2318,9 @@ ExecReScanAgg(AggState *node)
 
 	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 	{
+		Plan * outerPlan = outerPlan((Agg *) node->ss.ps.plan);
 		/* Rebuild an empty hash table */
-		build_hash_table(node);
+		build_hash_table(node, outerPlan->plan_width);
 		node->table_filled = false;
 	}
 	else
@@ -2269,3 +2476,385 @@ aggregate_dummy(PG_FUNCTION_ARGS)
 		 fcinfo->flinfo->fn_oid);
 	return (Datum) 0;			/* keep compiler quiet */
 }
+
+/*
+ * Computes a hash value from the group keys - this is pretty much the
+ * same as TupleHashTableHash, except that it's simplified a bit, and
+ * does not pass the tuples through an input etc.
+ */
+static uint32
+compute_hash_value(AggState * aggstate, TupleTableSlot * slot)
+{
+
+	uint32		hashkey = 0;
+	FmgrInfo   *hashfunctions = aggstate->hashfunctions;
+	int i = 0;
+
+	MemoryContext oldContext;
+
+	/* FIXME is it really OK to reset the per-tuple context here? */
+
+	/* Reset and switch into the temp context. */
+	MemoryContextReset(aggstate->hashtable->tmpctx);
+	oldContext = MemoryContextSwitchTo(aggstate->hashtable->tmpctx);
+
+	/* compute hash only from the needed column */
+	for (i = 0; i < aggstate->hashtable->numCols; i++)
+	{
+
+		AttrNumber	att = aggstate->hashtable->keyColIdx[i];
+		Datum		attr;
+		bool		isNull;
+
+		/* rotate hashkey left 1 bit at each step */
+		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
+
+		attr = slot_getattr(slot, att, &isNull);
+
+		if (!isNull)			/* treat nulls as having hash key 0 */
+		{
+			uint32		hkey;
+
+			hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
+												attr));
+			hashkey ^= hkey;
+		}
+	}
+
+	MemoryContextSwitchTo(oldContext);
+
+	return hashkey;
+
+}
+
+/*
+ * Computes index of the bucket the group entry belongs to (same principles as
+ * in ExecHashGetBucketAndBatch in nodeHash.c)
+ */
+static uint32
+compute_bucket(AggState * aggstate, uint32 hashvalue)
+{
+	return hashvalue & (aggstate->hashtable->nbuckets - 1);
+}
+
+/*
+ * Compares that the group keys of the two groups actually match, using the
+ * equality functions. This is much more expensive than comparing uint32
+ * values (hashes), so always check hashes first.
+ */
+static bool
+groups_match(AggState * aggstate, TupleTableSlot *slot, AggHashEntry entry)
+{
+	bool		result;
+	FmgrInfo   *eqfunctions = aggstate->eqfunctions;
+	TupleDesc	tupdesc;
+	int i = 0;
+
+	MemoryContext oldContext;
+
+	/*
+	 * XXX Do we really need to do this slot gymnastics? can't we get the
+	 * info from the minimal tuple directly? It init happens only once,
+	 * so the overhead is not that bad, but it's annoying. And we still
+	 * have to call ExecStoreMinimalTuple every time.
+	 */
+	if (aggstate->hashtable->slot == NULL)
+	{
+		oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
+		tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
+		aggstate->hashtable->slot = MakeSingleTupleTableSlot(tupdesc);
+		MemoryContextSwitchTo(oldContext);
+	}
+
+	/* FIXME is it really OK to reset the per-tuple memory context here? */
+
+	/* Reset and switch into the temp context. */
+	MemoryContextReset(aggstate->hashtable->tmpctx);
+	oldContext = MemoryContextSwitchTo(aggstate->hashtable->tmpctx);
+
+	ExecStoreMinimalTuple(entry->tuple, aggstate->hashtable->slot, false);
+
+	/*
+	 * We cannot report a match without checking all the fields, but we can
+	 * report a non-match as soon as we find unequal fields.  So, start
+	 * comparing at the last field (least significant sort key). That's the
+	 * most likely to be different if we are dealing with sorted input.
+	 */
+	result = true;
+
+	for (i = aggstate->hashtable->numCols; --i >= 0;)
+	{
+
+		AttrNumber	att = aggstate->hashtable->keyColIdx[i];
+		Datum		attr1,
+					attr2;
+		bool		isNull1,
+					isNull2;
+
+		attr1 = slot_getattr(slot,  att, &isNull1);
+		attr2 = slot_getattr(aggstate->hashtable->slot, att, &isNull2);
+
+		if (isNull1 != isNull2)
+		{
+			result = false;		/* one null and one not; they aren't equal */
+			break;
+		}
+
+		if (isNull1)
+			continue;			/* both are null, treat as equal */
+
+		/* Apply the type-specific equality function */
+
+		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+										attr1, attr2)))
+		{
+			result = false;		/* they aren't equal */
+			break;
+		}
+	}
+
+	MemoryContextSwitchTo(oldContext);
+
+	return result;
+}
+
+/*
+ * Resize the hash table for good performance. We're shooting for (nentries <= nbuckets)
+ * which should give us 1 group per bucket on average. We're working with groups and not
+ * tuples. And multiple tuples with the same hash are most likely in the same group, thus
+ * merged into a single entry. So we should not see many buckets with a long list of
+ * entries (which can happen in hashjoin quite easily).
+ */
+static void
+increase_nbuckets(AggState * aggstate)
+{
+
+	HashChunk chunk;
+	AggHashTable htab = aggstate->hashtable;
+
+	/* we've reached maximum number of buckets */
+	if (htab->nbuckets >= htab->nbuckets_max)
+		return;
+
+	htab->nbuckets *= 2;
+	htab->buckets
+		= (AggHashEntry*)repalloc(htab->buckets,
+								  htab->nbuckets * sizeof(AggHashEntry));
+	memset(htab->buckets, 0, htab->nbuckets * sizeof(AggHashEntry));
+
+	chunk = htab->chunks_hash;
+	while (chunk != NULL)
+	{
+
+		/* position within the buffer (up to chunk->used) */
+		size_t idx = 0;
+
+		/* we have a whole number of entries */
+		Assert(chunk->used % htab->entrysize == 0);
+
+		/* process all tuples stored in this chunk (and then free it) */
+		while (idx < chunk->used)
+		{
+
+			AggHashEntry entry = (AggHashEntry)(chunk->data + idx);
+
+			int bucketno = compute_bucket(aggstate, entry->hashvalue);
+
+			entry->next = htab->buckets[bucketno];
+			htab->buckets[bucketno] = entry;
+
+			/* bytes occupied in memory HJ tuple overhead + actual tuple length */
+			idx += htab->entrysize + entry->tuple->t_len;
+
+		}
+
+		/* proceed to the next chunk */
+		chunk = chunk->next;
+
+	}
+
+}
+
+static
+char * chunk_alloc(AggHashTable htab, int size)
+{
+	/* XXX maybe we should use MAXALIGN(size) here ... */
+
+	/* we need >8kB to get immediate free in aset.c */
+	Assert(HASH_CHUNK_SIZE > 8192);
+
+	/*
+	 * If the requested size is over 1/8 of chunk size, allocate a
+	 * separate chunk. of this size.
+	 *
+	 * XXXX This may be problematic, because chunks like this may get
+	 *      below 8kB, and thus be considered 'regular' blocks by aset.c
+	 *      (and put on freelist, instead of freeing immediately).
+	 */
+	if (size > (HASH_CHUNK_SIZE/8))
+	{
+
+		/*
+		 * Allocate new chunk and put it at the beginning of the list.
+		 *
+		 * There's no point in making this 2^N size, because blocks over
+		 * 8kB are handled as a special case in aset.c (exact size).
+		 */
+		HashChunk newChunk
+			= (HashChunk)MemoryContextAllocZero(htab->htabctx,
+								offsetof(HashChunkData, data) + size);
+
+		newChunk->maxlen = size;
+		newChunk->used = 0;
+		newChunk->ntuples = 0;
+
+		/*
+		 * If there already is a chunk, add the new one after it, so we
+		 * can still use the space in the existing one.
+		 */
+		if (htab->chunks_hash != NULL)
+		{
+			newChunk->next = htab->chunks_hash->next;
+			htab->chunks_hash->next = newChunk;
+		}
+		else
+		{
+			newChunk->next = htab->chunks_hash;
+			htab->chunks_hash = newChunk;
+		}
+
+		newChunk->used += size;
+		newChunk->ntuples += 1;
+
+		return newChunk->data;
+
+	}
+
+	/*
+	 * Requested size is less than 1/8 of a chunk, so place it in the
+	 * current chunk if there is enough free space. If not, allocate
+	 * a new chunk and add it there.
+	 */
+	if ((htab->chunks_hash == NULL) ||
+		(htab->chunks_hash->maxlen - htab->chunks_hash->used) < size)
+	{
+		/* allocate new chunk and put it at the beginning of the list */
+		HashChunk newChunk
+			= (HashChunk)MemoryContextAllocZero(htab->htabctx,
+						offsetof(HashChunkData, data) + HASH_CHUNK_SIZE);
+
+		newChunk->maxlen = HASH_CHUNK_SIZE;
+		newChunk->used = 0;
+		newChunk->ntuples = 0;
+
+		newChunk->next = htab->chunks_hash;
+		htab->chunks_hash = newChunk;
+	}
+
+	/* OK, we have enough space in the chunk, let's add the tuple */
+	htab->chunks_hash->used += size;
+	htab->chunks_hash->ntuples += 1;
+
+	/* allocate pointer to the start of the tuple memory */
+	return htab->chunks_hash->data + (htab->chunks_hash->used - size);
+
+}
+
+/*
+ * Resets the hash table iterator, so that it points to the first entry
+ * in the first chunk (the chunk created last, thus placed first in the
+ * list of chunks).
+ */
+static
+void IteratorReset(AggHashTable htab)
+{
+
+	htab->cur_chunk = htab->chunks_hash;
+
+	/* there may be no chunks at all (empty hash table) */
+	if (htab->cur_chunk != NULL)
+		htab->cur_entry = (AggHashEntry)htab->cur_chunk->data;
+	else
+		htab->cur_entry = NULL;
+
+}
+
+/*
+ * Returns the next hash table entry. Works by scanning the chunks, not
+ * by scanning the buckets etc. Returns NULL when there are no more
+ * entries.
+ */
+static
+AggHashEntry IteratorGetNext(AggHashTable htab)
+{
+
+	AggHashEntry	entry = NULL;
+	Size			len;
+
+	/* we've completed the last chunk (in the previous call) */
+	if (htab->cur_chunk == NULL)
+		return NULL;
+
+	/* we're not beyond the chunk data */
+	Assert((char*)htab->cur_entry < (htab->cur_chunk->data + htab->cur_chunk->used));
+
+	/*
+	 * We're still in the current chunk (otherwise the current chunk
+	 * would be set to NULL), so cur_entry points to a valid entry.
+	 * So compute how many bytes we need to skip to the next entry.
+	 */
+	entry = htab->cur_entry;
+	len = entry->tuple->t_len + htab->entrysize;
+
+	/*
+	 * Proceed to the next entry and check if we've reached end of this
+	 * chunk. If yes, skip to the next one and set the current entry
+	 * accordingly (chunk=NULL means there's no valid entry).
+	 */
+	htab->cur_entry = (AggHashEntry)((char*)entry + len);
+
+	if ((char*)htab->cur_entry >= (htab->cur_chunk->data + htab->cur_chunk->used))
+	{
+		htab->cur_chunk = htab->cur_chunk->next;
+		if (htab->cur_chunk != NULL)
+			htab->cur_entry = (AggHashEntry)htab->cur_chunk->data;
+		else
+			htab->cur_entry = NULL;
+	}
+
+	return entry;
+
+}
+
+/*
+ * Resets the contents of the hash table - removes all the entries and
+ * tuples, but keeps the 'size' of the hash table (nbuckets).
+ */
+static
+void reset_hash_table(AggHashTable htab) {
+
+	MemoryContext htabctx = htab->htabctx;
+	MemoryContext parent = htab->htabctx->parent;
+
+	htab->nentries = 0;
+	htab->chunks_hash = NULL;
+
+	/*
+	 * XXX If we could reset the context instead of recreating it
+	 *     from scratch, that'd be nice. However currently the reset
+	 *     often does not free a lot of memory because it keeps the
+	 *     blocks for future allocations.
+	 */
+	htab->htabctx = AllocSetContextCreateTracked(parent,
+											"HashAggHashTable",
+											ALLOCSET_DEFAULT_MINSIZE,
+											ALLOCSET_DEFAULT_INITSIZE,
+											ALLOCSET_DEFAULT_MAXSIZE,
+											true);
+
+	MemoryContextDelete(htabctx);
+
+	htab->buckets = (AggHashEntry*)MemoryContextAllocZero(htab->htabctx,
+								htab->nbuckets * sizeof(AggHashEntry));
+
+}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b271f21..995389b 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1701,6 +1701,8 @@ typedef struct GroupState
 /* these structs are private in nodeAgg.c: */
 typedef struct AggStatePerAggData *AggStatePerAgg;
 typedef struct AggStatePerGroupData *AggStatePerGroup;
+typedef struct AggHashEntryData *AggHashEntry;
+typedef struct AggHashTableData *AggHashTable;
 
 typedef struct AggState
 {
@@ -1714,15 +1716,16 @@ typedef struct AggState
 	ExprContext *tmpcontext;	/* econtext for input expressions */
 	AggStatePerAgg curperagg;	/* identifies currently active aggregate */
 	bool		agg_done;		/* indicates completion of Agg scan */
+
 	/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
 	AggStatePerGroup pergroup;	/* per-Aggref-per-group working state */
 	HeapTuple	grp_firstTuple; /* copy of first tuple of current group */
+
 	/* these fields are used in AGG_HASHED mode: */
-	TupleHashTable hashtable;	/* hash table with one entry per group */
-	TupleTableSlot *hashslot;	/* slot for loading hash table */
 	List	   *hash_needed;	/* list of columns needed in hash table */
 	bool		table_filled;	/* hash table filled yet? */
-	TupleHashIterator hashiter; /* for iterating through hash table */
+	AggHashTable	hashtable;	/* instance of the simple hash table */
+
 } AggState;
 
 /* ----------------
