From cc7a0b06d13fd47d6c78a74291b470e9c07e48af Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Fri, 31 Oct 2025 15:18:20 -0400
Subject: [PATCH v2 2/2] Change "long" numGroups fields to be Cardinality
 (i.e., double).

We've been nibbling away at removing uses of "long" for a long time,
since its width is platform-dependent.  Here's one more: change the
remaining "long" fields in Plan nodes to Cardinality, since the three
surviving examples all represent group-count estimates.  The upstream
planner code was converted to Cardinality some time ago; for example
the corresponding fields in Path nodes are type Cardinality, as are
the arguments of the make_foo_path functions.  Downstream in the
executor, it turns out that these all feed to the table-size argument
of BuildTupleHashTable.  Change that to "double" as well, and fix it
so that it safely clamps out-of-range values to the uint32 limit of
simplehash.h, as was not being done before.

Essentially, this is removing all the artificial datatype-dependent
limitations on these values from upstream processing, and applying
just one clamp at the moment where we're forced to do so by the
datatype choices of simplehash.h.

Also, remove BuildTupleHashTable's misguided attempt to enforce
work_mem/hash_mem_limit.  It doesn't have enough information
(particularly not the expected tuple width) to do that accurately,
and it has no real business second-guessing the caller's choice.
For all these plan types, it's really the planner's responsibility
to not choose a hashed implementation if the hashtable is expected
to exceed hash_mem_limit.  The previous patch improved the
accuracy of those estimates, and even if BuildTupleHashTable had
more information it should arrive at the same conclusions.

Reported-by: Jeff Janes <jeff.janes@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CAMkU=1zia0JfW_QR8L5xA2vpa0oqVuiapm78h=WpNsHH13_9uw@mail.gmail.com
---
 src/backend/executor/execGrouping.c       | 36 ++++++++++++-----------
 src/backend/executor/nodeAgg.c            | 24 +++++++--------
 src/backend/executor/nodeRecursiveunion.c |  1 -
 src/backend/executor/nodeSetOp.c          |  1 -
 src/backend/executor/nodeSubplan.c        | 19 +++++-------
 src/backend/optimizer/path/costsize.c     | 26 ----------------
 src/backend/optimizer/plan/createplan.c   | 26 +++++-----------
 src/include/executor/executor.h           |  2 +-
 src/include/nodes/plannodes.h             |  6 ++--
 src/include/optimizer/optimizer.h         |  1 -
 src/include/optimizer/planmain.h          |  2 +-
 11 files changed, 50 insertions(+), 94 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index e1a3a813dd9..8b64a625ca5 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -14,6 +14,8 @@
  */
 #include "postgres.h"
 
+#include <math.h>
+
 #include "access/htup_details.h"
 #include "access/parallel.h"
 #include "common/hashfn.h"
@@ -144,7 +146,7 @@ execTuplesHashPrepare(int numCols,
  *	eqfuncoids: OIDs of equality comparison functions to use
  *	hashfunctions: FmgrInfos of datatype-specific hashing functions to use
  *	collations: collations to use in comparisons
- *	nbuckets: initial estimate of hashtable size
+ *	nelements: initial estimate of hashtable size
  *	additionalsize: size of data that may be stored along with the hash entry
  *	metacxt: memory context for long-lived data and the simplehash table
  *	tuplescxt: memory context in which to store the hashed tuples themselves
@@ -187,7 +189,7 @@ BuildTupleHashTable(PlanState *parent,
 					const Oid *eqfuncoids,
 					FmgrInfo *hashfunctions,
 					Oid *collations,
-					long nbuckets,
+					double nelements,
 					Size additionalsize,
 					MemoryContext metacxt,
 					MemoryContext tuplescxt,
@@ -195,12 +197,24 @@ BuildTupleHashTable(PlanState *parent,
 					bool use_variable_hash_iv)
 {
 	TupleHashTable hashtable;
-	Size		entrysize;
-	Size		hash_mem_limit;
+	uint32		nbuckets;
 	MemoryContext oldcontext;
 	uint32		hash_iv = 0;
 
-	Assert(nbuckets > 0);
+	/*
+	 * tuplehash_create requires a uint32 element count, so we had better
+	 * clamp the given nelements to fit in that.  As long as we have to do
+	 * that, we might as well protect against completely insane input like
+	 * zero or NaN.  But it is not our job here to enforce issues like staying
+	 * within hash_mem: the caller should have done that, and we don't have
+	 * enough info to second-guess.
+	 */
+	if (isnan(nelements) || nelements <= 0)
+		nbuckets = 1;
+	else if (nelements >= PG_UINT32_MAX)
+		nbuckets = PG_UINT32_MAX;
+	else
+		nbuckets = (uint32) nelements;
 
 	/* tuplescxt must be separate, else ResetTupleHashTable breaks things */
 	Assert(metacxt != tuplescxt);
@@ -208,18 +222,6 @@ BuildTupleHashTable(PlanState *parent,
 	/* ensure additionalsize is maxalign'ed */
 	additionalsize = MAXALIGN(additionalsize);
 
-	/*
-	 * Limit initial table size request to not more than hash_mem.
-	 *
-	 * XXX this calculation seems pretty misguided, as it counts only overhead
-	 * and not the tuples themselves.  But we have no knowledge of the
-	 * expected tuple width here.
-	 */
-	entrysize = sizeof(TupleHashEntryData) + additionalsize;
-	hash_mem_limit = get_hash_memory_limit() / entrysize;
-	if (nbuckets > hash_mem_limit)
-		nbuckets = hash_mem_limit;
-
 	oldcontext = MemoryContextSwitchTo(metacxt);
 
 	hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 759ffeed2e6..058171d5133 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -402,12 +402,12 @@ static void find_cols(AggState *aggstate, Bitmapset **aggregated,
 					  Bitmapset **unaggregated);
 static bool find_cols_walker(Node *node, FindColsContext *context);
 static void build_hash_tables(AggState *aggstate);
-static void build_hash_table(AggState *aggstate, int setno, long nbuckets);
+static void build_hash_table(AggState *aggstate, int setno, double nbuckets);
 static void hashagg_recompile_expressions(AggState *aggstate, bool minslot,
 										  bool nullcheck);
 static void hash_create_memory(AggState *aggstate);
-static long hash_choose_num_buckets(double hashentrysize,
-									long ngroups, Size memory);
+static double hash_choose_num_buckets(double hashentrysize,
+									  double ngroups, Size memory);
 static int	hash_choose_num_partitions(double input_groups,
 									   double hashentrysize,
 									   int used_bits,
@@ -1469,7 +1469,7 @@ build_hash_tables(AggState *aggstate)
 	for (setno = 0; setno < aggstate->num_hashes; ++setno)
 	{
 		AggStatePerHash perhash = &aggstate->perhash[setno];
-		long		nbuckets;
+		double		nbuckets;
 		Size		memory;
 
 		if (perhash->hashtable != NULL)
@@ -1478,8 +1478,6 @@ build_hash_tables(AggState *aggstate)
 			continue;
 		}
 
-		Assert(perhash->aggnode->numGroups > 0);
-
 		memory = aggstate->hash_mem_limit / aggstate->num_hashes;
 
 		/* choose reasonable number of buckets per hashtable */
@@ -1505,7 +1503,7 @@ build_hash_tables(AggState *aggstate)
  * Build a single hashtable for this grouping set.
  */
 static void
-build_hash_table(AggState *aggstate, int setno, long nbuckets)
+build_hash_table(AggState *aggstate, int setno, double nbuckets)
 {
 	AggStatePerHash perhash = &aggstate->perhash[setno];
 	MemoryContext metacxt = aggstate->hash_metacxt;
@@ -2053,11 +2051,11 @@ hash_create_memory(AggState *aggstate)
 /*
  * Choose a reasonable number of buckets for the initial hash table size.
  */
-static long
-hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
+static double
+hash_choose_num_buckets(double hashentrysize, double ngroups, Size memory)
 {
-	long		max_nbuckets;
-	long		nbuckets = ngroups;
+	double		max_nbuckets;
+	double		nbuckets = ngroups;
 
 	max_nbuckets = memory / hashentrysize;
 
@@ -2065,7 +2063,7 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
 	 * Underestimating is better than overestimating. Too many buckets crowd
 	 * out space for group keys and transition state values.
 	 */
-	max_nbuckets >>= 1;
+	max_nbuckets /= 2;
 
 	if (nbuckets > max_nbuckets)
 		nbuckets = max_nbuckets;
@@ -3686,7 +3684,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	if (use_hashing)
 	{
 		Plan	   *outerplan = outerPlan(node);
-		uint64		totalGroups = 0;
+		double		totalGroups = 0;
 
 		aggstate->hash_spill_rslot = ExecInitExtraTupleSlot(estate, scanDesc,
 															&TTSOpsMinimalTuple);
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index ebb7919b49b..cd0ad51dcd2 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -35,7 +35,6 @@ build_hash_table(RecursiveUnionState *rustate)
 	TupleDesc	desc = ExecGetResultType(outerPlanState(rustate));
 
 	Assert(node->numCols > 0);
-	Assert(node->numGroups > 0);
 
 	/*
 	 * If both child plans deliver the same fixed tuple slot type, we can tell
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 5aabed18a09..9e0f9274fb1 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -88,7 +88,6 @@ build_hash_table(SetOpState *setopstate)
 	TupleDesc	desc = ExecGetResultType(outerPlanState(setopstate));
 
 	Assert(node->strategy == SETOP_HASHED);
-	Assert(node->numGroups > 0);
 
 	/*
 	 * If both child plans deliver the same fixed tuple slot type, we can tell
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 96d900a7783..aa9fa482797 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -34,7 +34,6 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
-#include "optimizer/optimizer.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -481,7 +480,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	int			ncols = node->numCols;
 	ExprContext *innerecontext = node->innerecontext;
 	MemoryContext oldcontext;
-	long		nbuckets;
+	double		nentries;
 	TupleTableSlot *slot;
 
 	Assert(subplan->subLinkType == ANY_SUBLINK);
@@ -509,9 +508,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	node->havehashrows = false;
 	node->havenullrows = false;
 
-	nbuckets = clamp_cardinality_to_long(planstate->plan->plan_rows);
-	if (nbuckets < 1)
-		nbuckets = 1;
+	nentries = planstate->plan->plan_rows;
 
 	if (node->hashtable)
 		ResetTupleHashTable(node->hashtable);
@@ -524,7 +521,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 											  node->tab_eq_funcoids,
 											  node->tab_hash_funcs,
 											  node->tab_collations,
-											  nbuckets,
+											  nentries,
 											  0,	/* no additional data */
 											  node->planstate->state->es_query_cxt,
 											  node->tuplesContext,
@@ -534,12 +531,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	if (!subplan->unknownEqFalse)
 	{
 		if (ncols == 1)
-			nbuckets = 1;		/* there can only be one entry */
+			nentries = 1;		/* there can only be one entry */
 		else
 		{
-			nbuckets /= 16;
-			if (nbuckets < 1)
-				nbuckets = 1;
+			nentries /= 16;
+			if (nentries < 1)
+				nentries = 1;
 		}
 
 		if (node->hashnulls)
@@ -553,7 +550,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 												  node->tab_eq_funcoids,
 												  node->tab_hash_funcs,
 												  node->tab_collations,
-												  nbuckets,
+												  nentries,
 												  0,	/* no additional data */
 												  node->planstate->state->es_query_cxt,
 												  node->tuplesContext,
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 94077e6a006..8335cf5b5c5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -257,32 +257,6 @@ clamp_width_est(int64 tuple_width)
 	return (int32) tuple_width;
 }
 
-/*
- * clamp_cardinality_to_long
- *		Cast a Cardinality value to a sane long value.
- */
-long
-clamp_cardinality_to_long(Cardinality x)
-{
-	/*
-	 * Just for paranoia's sake, ensure we do something sane with negative or
-	 * NaN values.
-	 */
-	if (isnan(x))
-		return LONG_MAX;
-	if (x <= 0)
-		return 0;
-
-	/*
-	 * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
-	 * double.  Casting it to double and back may well result in overflow due
-	 * to rounding, so avoid doing that.  We trust that any double value that
-	 * compares strictly less than "(double) LONG_MAX" will cast to a
-	 * representable "long" value.
-	 */
-	return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
-}
-
 
 /*
  * cost_seqscan
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 63fe6637155..8af091ba647 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -226,7 +226,7 @@ static RecursiveUnion *make_recursive_union(List *tlist,
 											Plan *righttree,
 											int wtParam,
 											List *distinctList,
-											long numGroups);
+											Cardinality numGroups);
 static BitmapAnd *make_bitmap_and(List *bitmapplans);
 static BitmapOr *make_bitmap_or(List *bitmapplans);
 static NestLoop *make_nestloop(List *tlist,
@@ -301,7 +301,7 @@ static Gather *make_gather(List *qptlist, List *qpqual,
 						   int nworkers, int rescan_param, bool single_copy, Plan *subplan);
 static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
 						 List *tlist, Plan *lefttree, Plan *righttree,
-						 List *groupList, long numGroups);
+						 List *groupList, Cardinality numGroups);
 static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
 static Result *make_gating_result(List *tlist, Node *resconstantqual,
 								  Plan *subplan);
@@ -2564,7 +2564,6 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
 	List	   *tlist = build_path_tlist(root, &best_path->path);
 	Plan	   *leftplan;
 	Plan	   *rightplan;
-	long		numGroups;
 
 	/*
 	 * SetOp doesn't project, so tlist requirements pass through; moreover we
@@ -2575,16 +2574,13 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
 	rightplan = create_plan_recurse(root, best_path->rightpath,
 									flags | CP_LABEL_TLIST);
 
-	/* Convert numGroups to long int --- but 'ware overflow! */
-	numGroups = clamp_cardinality_to_long(best_path->numGroups);
-
 	plan = make_setop(best_path->cmd,
 					  best_path->strategy,
 					  tlist,
 					  leftplan,
 					  rightplan,
 					  best_path->groupList,
-					  numGroups);
+					  best_path->numGroups);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -2604,7 +2600,6 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path)
 	Plan	   *leftplan;
 	Plan	   *rightplan;
 	List	   *tlist;
-	long		numGroups;
 
 	/* Need both children to produce same tlist, so force it */
 	leftplan = create_plan_recurse(root, best_path->leftpath, CP_EXACT_TLIST);
@@ -2612,15 +2607,12 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path)
 
 	tlist = build_path_tlist(root, &best_path->path);
 
-	/* Convert numGroups to long int --- but 'ware overflow! */
-	numGroups = clamp_cardinality_to_long(best_path->numGroups);
-
 	plan = make_recursive_union(tlist,
 								leftplan,
 								rightplan,
 								best_path->wtParam,
 								best_path->distinctList,
-								numGroups);
+								best_path->numGroups);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -5845,7 +5837,7 @@ make_recursive_union(List *tlist,
 					 Plan *righttree,
 					 int wtParam,
 					 List *distinctList,
-					 long numGroups)
+					 Cardinality numGroups)
 {
 	RecursiveUnion *node = makeNode(RecursiveUnion);
 	Plan	   *plan = &node->plan;
@@ -6582,15 +6574,11 @@ Agg *
 make_agg(List *tlist, List *qual,
 		 AggStrategy aggstrategy, AggSplit aggsplit,
 		 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-		 List *groupingSets, List *chain, double dNumGroups,
+		 List *groupingSets, List *chain, Cardinality numGroups,
 		 Size transitionSpace, Plan *lefttree)
 {
 	Agg		   *node = makeNode(Agg);
 	Plan	   *plan = &node->plan;
-	long		numGroups;
-
-	/* Reduce to long, but 'ware overflow! */
-	numGroups = clamp_cardinality_to_long(dNumGroups);
 
 	node->aggstrategy = aggstrategy;
 	node->aggsplit = aggsplit;
@@ -6822,7 +6810,7 @@ make_gather(List *qptlist,
 static SetOp *
 make_setop(SetOpCmd cmd, SetOpStrategy strategy,
 		   List *tlist, Plan *lefttree, Plan *righttree,
-		   List *groupList, long numGroups)
+		   List *groupList, Cardinality numGroups)
 {
 	SetOp	   *node = makeNode(SetOp);
 	Plan	   *plan = &node->plan;
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 086f52cff3d..fa2b657fb2f 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -138,7 +138,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
 										  const Oid *eqfuncoids,
 										  FmgrInfo *hashfunctions,
 										  Oid *collations,
-										  long nbuckets,
+										  double nelements,
 										  Size additionalsize,
 										  MemoryContext metacxt,
 										  MemoryContext tuplescxt,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 7cdd2b51c94..c4393a94321 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -475,7 +475,7 @@ typedef struct RecursiveUnion
 	Oid		   *dupCollations pg_node_attr(array_size(numCols));
 
 	/* estimated number of groups in input */
-	long		numGroups;
+	Cardinality numGroups;
 } RecursiveUnion;
 
 /* ----------------
@@ -1206,7 +1206,7 @@ typedef struct Agg
 	Oid		   *grpCollations pg_node_attr(array_size(numCols));
 
 	/* estimated number of groups in input */
-	long		numGroups;
+	Cardinality numGroups;
 
 	/* for pass-by-ref transition data */
 	uint64		transitionSpace;
@@ -1446,7 +1446,7 @@ typedef struct SetOp
 	bool	   *cmpNullsFirst pg_node_attr(array_size(numCols));
 
 	/* estimated number of groups in left input */
-	long		numGroups;
+	Cardinality numGroups;
 } SetOp;
 
 /* ----------------
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index a34113903c0..d0aa8ab0c1c 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -82,7 +82,6 @@ extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);
 extern int32 clamp_width_est(int64 tuple_width);
-extern long clamp_cardinality_to_long(Cardinality x);
 
 /* in path/indxpath.c: */
 
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 09b48b26f8f..00addf15992 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -55,7 +55,7 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
 extern Agg *make_agg(List *tlist, List *qual,
 					 AggStrategy aggstrategy, AggSplit aggsplit,
 					 int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
-					 List *groupingSets, List *chain, double dNumGroups,
+					 List *groupingSets, List *chain, Cardinality numGroups,
 					 Size transitionSpace, Plan *lefttree);
 extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
 						 LimitOption limitOption, int uniqNumCols,
-- 
2.43.7

