From 20bada97f9f36d5cb0d6f211f57c084b8cdb2d79 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 6 Dec 2025 13:13:14 -0500
Subject: [PATCH v3] Handle constant inputs to corr() and related aggregates
 more precisely.

The SQL standard says that corr() and friends should return NULL in
the mathematically-undefined case where all the inputs in one of
the columns have the same value.  We were checking that by seeing
if the sums Sxx and Syy were zero, but that approach is very
vulnerable to roundoff error: if a sum is close to zero but not
exactly that, we'd come out with a pretty silly non-NULL result.

Instead, directly track whether the inputs are all equal by
remembering the common value in each column.  Once we detect
that a new input is different from before, represent that by
storing NaN for the common value.  (An objection to this scheme
is that if the inputs are all NaN, we will consider that they
were not all equal.  But under IEEE float arithmetic rules,
one NaN is never equal to another, so this behavior is arguably
correct.  Moreover it matches what we did before in such cases.)
Then, leave the sums at their exact value of zero for as long
as we haven't detected different input values.

This solution requires the aggregate transition state to contain
8 float values not 6, which is not problematic, and it seems to add
less than 1% to the aggregates' runtime, which seems acceptable.

While we're here, improve corr()'s final function to cope with
overflow/underflow in the final calculation, and to clamp its
result to [-1, 1] in case of roundoff error.

Although this is arguably a bug fix, it requires a catversion bump
due to the change in aggregates' initial states, so it can't be
back-patched.

Patch by me, but many of the ideas are due to Dean Rasheed,
who also did a deal of testing.

Bug: #19340
Reported-by: Oleg Ivanov <o15611@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Co-authored-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://postgr.es/m/19340-6fb9f6637f562092@postgresql.org
---
 src/backend/utils/adt/float.c            | 165 +++++++++++++++++++----
 src/include/catalog/pg_aggregate.dat     |  22 +--
 src/test/regress/expected/aggregates.out |  94 ++++++++++---
 src/test/regress/sql/aggregates.sql      |  32 ++++-
 4 files changed, 247 insertions(+), 66 deletions(-)

diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 7b97d2be6ca..3a60dc73a94 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -3319,9 +3319,21 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
  * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
  * reduce rounding errors in the aggregate final functions.
  *
- * The transition datatype for all these aggregates is a 6-element array of
+ * The transition datatype for all these aggregates is an 8-element array of
  * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
- * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)), commonX, and commonY
+ * in that order.
+ *
+ * commonX is defined as the common X value if all the X values were the same,
+ * else NaN; likewise for commonY.  This is useful for deciding whether corr()
+ * and related functions should return NULL.  This representation cannot
+ * distinguish the-values-were-all-NaN from the-values-were-not-all-the-same,
+ * but that's okay because for this purpose we use the IEEE float arithmetic
+ * principle that two NaNs are never equal.  The SQL standard doesn't mention
+ * NaNs, but it says that NULL is to be returned when N*sum(X*X) equals
+ * sum(X)*sum(X) (etc), and that shouldn't be considered true for NaNs.
+ * Testing that as written in the spec would be highly subject to roundoff
+ * error, so instead we directly track whether all the inputs are equal.
  *
  * Note that Y is the first argument to all these aggregates!
  *
@@ -3345,17 +3357,21 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 				Sy,
 				Syy,
 				Sxy,
+				commonX,
+				commonY,
 				tmpX,
 				tmpY,
 				scale;
 
-	transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_accum", 8);
 	N = transvalues[0];
 	Sx = transvalues[1];
 	Sxx = transvalues[2];
 	Sy = transvalues[3];
 	Syy = transvalues[4];
 	Sxy = transvalues[5];
+	commonX = transvalues[6];
+	commonY = transvalues[7];
 
 	/*
 	 * Use the Youngs-Cramer algorithm to incorporate the new values into the
@@ -3366,12 +3382,33 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 	Sy += newvalY;
 	if (transvalues[0] > 0.0)
 	{
+		/*
+		 * Check to see if we have seen distinct inputs.  We can use a test
+		 * that's a bit cheaper than float8_ne() because if commonX is already
+		 * NaN, it does not matter whether the != test returns true or not.
+		 */
+		if (newvalX != commonX || isnan(newvalX))
+			commonX = get_float8_nan();
+		if (newvalY != commonY || isnan(newvalY))
+			commonY = get_float8_nan();
+
 		tmpX = newvalX * N - Sx;
 		tmpY = newvalY * N - Sy;
 		scale = 1.0 / (N * transvalues[0]);
-		Sxx += tmpX * tmpX * scale;
-		Syy += tmpY * tmpY * scale;
-		Sxy += tmpX * tmpY * scale;
+
+		/*
+		 * If we have not seen distinct inputs, then Sxx, Syy, and/or Sxy
+		 * should remain zero (since Sx's exact value would be N * commonX,
+		 * etc).  Updating them would just create the possibility of injecting
+		 * roundoff error, and we need exact zero results so that the final
+		 * functions will return NULL in the right cases.
+		 */
+		if (isnan(commonX))
+			Sxx += tmpX * tmpX * scale;
+		if (isnan(commonY))
+			Syy += tmpY * tmpY * scale;
+		if (isnan(commonX) && isnan(commonY))
+			Sxy += tmpX * tmpY * scale;
 
 		/*
 		 * Overflow check.  We only report an overflow error when finite
@@ -3410,6 +3447,9 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 			Sxx = Sxy = get_float8_nan();
 		if (isnan(newvalY) || isinf(newvalY))
 			Syy = Sxy = get_float8_nan();
+
+		commonX = newvalX;
+		commonY = newvalY;
 	}
 
 	/*
@@ -3425,12 +3465,14 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 		transvalues[3] = Sy;
 		transvalues[4] = Syy;
 		transvalues[5] = Sxy;
+		transvalues[6] = commonX;
+		transvalues[7] = commonY;
 
 		PG_RETURN_ARRAYTYPE_P(transarray);
 	}
 	else
 	{
-		Datum		transdatums[6];
+		Datum		transdatums[8];
 		ArrayType  *result;
 
 		transdatums[0] = Float8GetDatumFast(N);
@@ -3439,8 +3481,10 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 		transdatums[3] = Float8GetDatumFast(Sy);
 		transdatums[4] = Float8GetDatumFast(Syy);
 		transdatums[5] = Float8GetDatumFast(Sxy);
+		transdatums[6] = Float8GetDatumFast(commonX);
+		transdatums[7] = Float8GetDatumFast(commonY);
 
-		result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+		result = construct_array_builtin(transdatums, 8, FLOAT8OID);
 
 		PG_RETURN_ARRAYTYPE_P(result);
 	}
@@ -3449,7 +3493,7 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 /*
  * float8_regr_combine
  *
- * An aggregate combine function used to combine two 6 fields
+ * An aggregate combine function used to combine two 8-fields
  * aggregate transition data into a single transition data.
  * This function is used only in two stage aggregation and
  * shouldn't be called outside aggregate context.
@@ -3467,12 +3511,16 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 				Sy1,
 				Syy1,
 				Sxy1,
+				Cx1,
+				Cy1,
 				N2,
 				Sx2,
 				Sxx2,
 				Sy2,
 				Syy2,
 				Sxy2,
+				Cx2,
+				Cy2,
 				tmp1,
 				tmp2,
 				N,
@@ -3480,10 +3528,12 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 				Sxx,
 				Sy,
 				Syy,
-				Sxy;
+				Sxy,
+				Cx,
+				Cy;
 
-	transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
-	transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
+	transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 8);
+	transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 8);
 
 	N1 = transvalues1[0];
 	Sx1 = transvalues1[1];
@@ -3491,6 +3541,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 	Sy1 = transvalues1[3];
 	Syy1 = transvalues1[4];
 	Sxy1 = transvalues1[5];
+	Cx1 = transvalues1[6];
+	Cy1 = transvalues1[7];
 
 	N2 = transvalues2[0];
 	Sx2 = transvalues2[1];
@@ -3498,6 +3550,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 	Sy2 = transvalues2[3];
 	Syy2 = transvalues2[4];
 	Sxy2 = transvalues2[5];
+	Cx2 = transvalues2[6];
+	Cy2 = transvalues2[7];
 
 	/*--------------------
 	 * The transition values combine using a generalization of the
@@ -3523,6 +3577,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 		Sy = Sy2;
 		Syy = Syy2;
 		Sxy = Sxy2;
+		Cx = Cx2;
+		Cy = Cy2;
 	}
 	else if (N2 == 0.0)
 	{
@@ -3532,6 +3588,8 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 		Sy = Sy1;
 		Syy = Syy1;
 		Sxy = Sxy1;
+		Cx = Cx1;
+		Cy = Cy1;
 	}
 	else
 	{
@@ -3549,6 +3607,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 		Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
 		if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2))
 			float_overflow_error();
+		if (float8_eq(Cx1, Cx2))
+			Cx = Cx1;
+		else
+			Cx = get_float8_nan();
+		if (float8_eq(Cy1, Cy2))
+			Cy = Cy1;
+		else
+			Cy = get_float8_nan();
 	}
 
 	/*
@@ -3564,12 +3630,14 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 		transvalues1[3] = Sy;
 		transvalues1[4] = Syy;
 		transvalues1[5] = Sxy;
+		transvalues1[6] = Cx;
+		transvalues1[7] = Cy;
 
 		PG_RETURN_ARRAYTYPE_P(transarray1);
 	}
 	else
 	{
-		Datum		transdatums[6];
+		Datum		transdatums[8];
 		ArrayType  *result;
 
 		transdatums[0] = Float8GetDatumFast(N);
@@ -3578,8 +3646,10 @@ float8_regr_combine(PG_FUNCTION_ARGS)
 		transdatums[3] = Float8GetDatumFast(Sy);
 		transdatums[4] = Float8GetDatumFast(Syy);
 		transdatums[5] = Float8GetDatumFast(Sxy);
+		transdatums[6] = Float8GetDatumFast(Cx);
+		transdatums[7] = Float8GetDatumFast(Cy);
 
-		result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+		result = construct_array_builtin(transdatums, 8, FLOAT8OID);
 
 		PG_RETURN_ARRAYTYPE_P(result);
 	}
@@ -3594,7 +3664,7 @@ float8_regr_sxx(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxx;
 
-	transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_sxx", 8);
 	N = transvalues[0];
 	Sxx = transvalues[2];
 
@@ -3615,7 +3685,7 @@ float8_regr_syy(PG_FUNCTION_ARGS)
 	float8		N,
 				Syy;
 
-	transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_syy", 8);
 	N = transvalues[0];
 	Syy = transvalues[4];
 
@@ -3636,7 +3706,7 @@ float8_regr_sxy(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_sxy", 8);
 	N = transvalues[0];
 	Sxy = transvalues[5];
 
@@ -3655,16 +3725,22 @@ float8_regr_avgx(PG_FUNCTION_ARGS)
 	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
 	float8	   *transvalues;
 	float8		N,
-				Sx;
+				Sx,
+				commonX;
 
-	transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_avgx", 8);
 	N = transvalues[0];
 	Sx = transvalues[1];
+	commonX = transvalues[6];
 
 	/* if N is 0 we should return NULL */
 	if (N < 1.0)
 		PG_RETURN_NULL();
 
+	/* if all inputs were the same just return that, avoiding roundoff error */
+	if (!isnan(commonX))
+		PG_RETURN_FLOAT8(commonX);
+
 	PG_RETURN_FLOAT8(Sx / N);
 }
 
@@ -3674,16 +3750,22 @@ float8_regr_avgy(PG_FUNCTION_ARGS)
 	ArrayType  *transarray = PG_GETARG_ARRAYTYPE_P(0);
 	float8	   *transvalues;
 	float8		N,
-				Sy;
+				Sy,
+				commonY;
 
-	transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_avgy", 8);
 	N = transvalues[0];
 	Sy = transvalues[3];
+	commonY = transvalues[7];
 
 	/* if N is 0 we should return NULL */
 	if (N < 1.0)
 		PG_RETURN_NULL();
 
+	/* if all inputs were the same just return that, avoiding roundoff error */
+	if (!isnan(commonY))
+		PG_RETURN_FLOAT8(commonY);
+
 	PG_RETURN_FLOAT8(Sy / N);
 }
 
@@ -3695,7 +3777,7 @@ float8_covar_pop(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+	transvalues = check_float8_array(transarray, "float8_covar_pop", 8);
 	N = transvalues[0];
 	Sxy = transvalues[5];
 
@@ -3714,7 +3796,7 @@ float8_covar_samp(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+	transvalues = check_float8_array(transarray, "float8_covar_samp", 8);
 	N = transvalues[0];
 	Sxy = transvalues[5];
 
@@ -3733,9 +3815,12 @@ float8_corr(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxx,
 				Syy,
-				Sxy;
+				Sxy,
+				product,
+				sqrtproduct,
+				result;
 
-	transvalues = check_float8_array(transarray, "float8_corr", 6);
+	transvalues = check_float8_array(transarray, "float8_corr", 8);
 	N = transvalues[0];
 	Sxx = transvalues[2];
 	Syy = transvalues[4];
@@ -3751,7 +3836,29 @@ float8_corr(PG_FUNCTION_ARGS)
 	if (Sxx == 0 || Syy == 0)
 		PG_RETURN_NULL();
 
-	PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
+	/*
+	 * The product Sxx * Syy might underflow or overflow.  If so, we can
+	 * recover by computing sqrt(Sxx) * sqrt(Syy) instead of sqrt(Sxx * Syy).
+	 * However, the double sqrt() calculation is a bit slower and less
+	 * accurate, so don't do it if we don't have to.
+	 */
+	product = Sxx * Syy;
+	if (product == 0 || isinf(product))
+		sqrtproduct = sqrt(Sxx) * sqrt(Syy);
+	else
+		sqrtproduct = sqrt(product);
+	result = Sxy / sqrtproduct;
+
+	/*
+	 * Despite all these precautions, this formula can yield results outside
+	 * [-1, 1] due to roundoff error.  Clamp it to the expected range.
+	 */
+	if (result < -1)
+		result = -1;
+	else if (result > 1)
+		result = 1;
+
+	PG_RETURN_FLOAT8(result);
 }
 
 Datum
@@ -3764,7 +3871,7 @@ float8_regr_r2(PG_FUNCTION_ARGS)
 				Syy,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_r2", 8);
 	N = transvalues[0];
 	Sxx = transvalues[2];
 	Syy = transvalues[4];
@@ -3796,7 +3903,7 @@ float8_regr_slope(PG_FUNCTION_ARGS)
 				Sxx,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_slope", 8);
 	N = transvalues[0];
 	Sxx = transvalues[2];
 	Sxy = transvalues[5];
@@ -3825,7 +3932,7 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
 				Sy,
 				Sxy;
 
-	transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
 	N = transvalues[0];
 	Sx = transvalues[1];
 	Sxx = transvalues[2];
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 870769e8f14..f22ccfbf49f 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -475,37 +475,37 @@
   aggcombinefn => 'int8pl', aggtranstype => 'int8', agginitval => '0' },
 { aggfnoid => 'regr_sxx', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_sxx', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_syy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_syy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_sxy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_sxy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_avgx', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_avgx', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_avgy', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_avgy', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_r2', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_r2', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_slope', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_slope', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'regr_intercept', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_regr_intercept', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'covar_pop', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_covar_pop', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'covar_samp', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_covar_samp', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 { aggfnoid => 'corr', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_corr', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 
 # boolean-and and boolean-or
 { aggfnoid => 'bool_and', aggtransfn => 'booland_statefunc',
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index be0e1573183..cae8e7bca31 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -515,6 +515,62 @@ SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
        NaN |           
 (1 row)
 
+-- check some cases that formerly had poor roundoff-error behavior
+SELECT corr(0.09, g), regr_r2(0.09, g)
+  FROM generate_series(1, 30) g;
+ corr | regr_r2 
+------+---------
+      |       1
+(1 row)
+
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+  FROM generate_series(1, 30) g;
+ corr | regr_r2 | regr_slope | regr_intercept 
+------+---------+------------+----------------
+      |         |            |               
+(1 row)
+
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+  FROM generate_series(1, 3) g;
+ corr 
+------
+     
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 3) g;
+ corr 
+------
+    1
+(1 row)
+
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 30) g;
+ corr 
+------
+    1
+(1 row)
+
+-- these examples pose definitional questions for NaN inputs,
+-- which we resolve by saying that an all-NaN input column is not all equal
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+ corr 
+------
+  NaN
+(1 row)
+
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+ corr 
+------
+     
+(1 row)
+
+SELECT corr('NaN', 'NaN') FROM generate_series(1, 30) g;
+ corr 
+------
+  NaN
+(1 row)
+
 -- test accum and combine functions directly
 CREATE TABLE regr_test (x float8, y float8);
 INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -538,10 +594,10 @@ SELECT float8_accum('{4,140,2900}'::float8[], 100);
  {5,240,6280}
 (1 row)
 
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
-      float8_regr_accum       
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
+           float8_regr_accum           
+---------------------------------------
+ {5,240,2900,1490,95080,15050,100,NaN}
 (1 row)
 
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -576,25 +632,25 @@ SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
  {5,240,6280}
 (1 row)
 
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{0,0,0,0,0,0}'::float8[]);
-    float8_regr_combine    
----------------------------
- {3,60,200,750,20000,2000}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+                           '{0,0,0,0,0,0,0,0}'::float8[]);
+       float8_regr_combine       
+---------------------------------
+ {3,60,200,750,20000,2000,1,NaN}
 (1 row)
 
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-     float8_regr_combine     
------------------------------
- {2,180,200,740,57800,-3400}
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+        float8_regr_combine        
+-----------------------------------
+ {2,180,200,740,57800,-3400,NaN,1}
 (1 row)
 
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-     float8_regr_combine      
-------------------------------
- {5,240,6280,1490,95080,8680}
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+                           '{2,180,200,740,57800,-3400,7,9}'::float8[]);
+        float8_regr_combine         
+------------------------------------
+ {5,240,6280,1490,95080,8680,7,NaN}
 (1 row)
 
 DROP TABLE regr_test;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 77ca6ffa3a9..850f5a5787f 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -140,6 +140,24 @@ SELECT covar_pop(1::float8,2::float8), covar_samp(3::float8,4::float8);
 SELECT covar_pop(1::float8,'inf'::float8), covar_samp(3::float8,'inf'::float8);
 SELECT covar_pop(1::float8,'nan'::float8), covar_samp(3::float8,'nan'::float8);
 
+-- check some cases that formerly had poor roundoff-error behavior
+SELECT corr(0.09, g), regr_r2(0.09, g)
+  FROM generate_series(1, 30) g;
+SELECT corr(g, 0.09), regr_r2(g, 0.09), regr_slope(g, 0.09), regr_intercept(g, 0.09)
+  FROM generate_series(1, 30) g;
+SELECT corr(1.3 + g * 1e-16, 1.3 + g * 1e-16)
+  FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 3) g;
+SELECT corr(1e-100 + g * 1e-105, 1e-100 + g * 1e-105)
+  FROM generate_series(1, 30) g;
+
+-- these examples pose definitional questions for NaN inputs,
+-- which we resolve by saying that an all-NaN input column is not all equal
+SELECT corr(g, 'NaN') FROM generate_series(1, 30) g;
+SELECT corr(0.1, 'NaN') FROM generate_series(1, 30) g;
+SELECT corr('NaN', 'NaN') FROM generate_series(1, 30) g;
+
 -- test accum and combine functions directly
 CREATE TABLE regr_test (x float8, y float8);
 INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
@@ -148,7 +166,7 @@ FROM regr_test WHERE x IN (10,20,30,80);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
 FROM regr_test;
 SELECT float8_accum('{4,140,2900}'::float8[], 100);
-SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
+SELECT float8_regr_accum('{4,140,2900,1290,83075,15050,100,0}'::float8[], 200, 100);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
 FROM regr_test WHERE x IN (10,20,30);
 SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
@@ -156,12 +174,12 @@ FROM regr_test WHERE x IN (80,100);
 SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
 SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
 SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{0,0,0,0,0,0}'::float8[]);
-SELECT float8_regr_combine('{0,0,0,0,0,0}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
-SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
-                           '{2,180,200,740,57800,-3400}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,1,NaN}'::float8[],
+                           '{0,0,0,0,0,0,0,0}'::float8[]);
+SELECT float8_regr_combine('{0,0,0,0,0,0,0,0}'::float8[],
+                           '{2,180,200,740,57800,-3400,NaN,1}'::float8[]);
+SELECT float8_regr_combine('{3,60,200,750,20000,2000,7,8}'::float8[],
+                           '{2,180,200,740,57800,-3400,7,9}'::float8[]);
 DROP TABLE regr_test;
 
 -- test count, distinct
-- 
2.43.7

