From c476028f989dd01d27ffa4b2c1a175c18f379d4a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 13 Nov 2025 12:50:44 -0500
Subject: [PATCH v5 3/5] Rethink eqjoinsel's handling of reversed joins.

Formerly, if we needed to deal with a "reversed" join where the
outer-side variable is on the right hand of the given operator,
we looked up the operator's commutator and applied that, so that
eqjoinsel_semi could always treat "sslot1" as the outer-side
variable of the semijoin.

This isn't great, because we ended up punting to a poor estimate
if no commutator is recorded.  It also doesn't play well with
later changes in this patch series.  Instead, let's handle the
case by swapping the left and right input values just before
we call the comparison operator.  While this theoretically adds
cycles to the inner comparison loop, with the coding proposed
here I don't see any real timing difference.  (But I only tested
it on x86_64.)
---
 src/backend/utils/adt/selfuncs.c | 44 +++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 590b3a0c078..53653d2d05b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -156,6 +156,7 @@ static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
 							  Form_pg_statistic stats1, Form_pg_statistic stats2,
 							  bool have_mcvs1, bool have_mcvs2);
 static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
+							 bool op_is_reversed,
 							 VariableStatData *vardata1, VariableStatData *vardata2,
 							 double nd1, double nd2,
 							 bool isdefault1, bool isdefault2,
@@ -164,6 +165,7 @@ static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
 							 bool have_mcvs1, bool have_mcvs2,
 							 RelOptInfo *inner_rel);
 static void eqjoinsel_find_matches(Oid opfuncoid, Oid collation,
+								   bool op_is_reversed,
 								   AttStatsSlot *sslot1, AttStatsSlot *sslot2,
 								   int nvalues1, int nvalues2,
 								   bool *hasmatch1, bool *hasmatch2,
@@ -2394,6 +2396,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
 
 			if (!join_is_reversed)
 				selec = eqjoinsel_semi(opfuncoid, collation,
+									   false,
 									   &vardata1, &vardata2,
 									   nd1, nd2,
 									   isdefault1, isdefault2,
@@ -2402,11 +2405,8 @@ eqjoinsel(PG_FUNCTION_ARGS)
 									   have_mcvs1, have_mcvs2,
 									   inner_rel);
 			else
-			{
-				Oid			commop = get_commutator(operator);
-				Oid			commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
-
-				selec = eqjoinsel_semi(commopfuncoid, collation,
+				selec = eqjoinsel_semi(opfuncoid, collation,
+									   true,
 									   &vardata2, &vardata1,
 									   nd2, nd1,
 									   isdefault2, isdefault1,
@@ -2414,7 +2414,6 @@ eqjoinsel(PG_FUNCTION_ARGS)
 									   stats2, stats1,
 									   have_mcvs2, have_mcvs1,
 									   inner_rel);
-			}
 
 			/*
 			 * We should never estimate the output of a semijoin to be more
@@ -2499,6 +2498,7 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
 		hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
 
 		eqjoinsel_find_matches(opfuncoid, collation,
+							   false,
 							   sslot1, sslot2,
 							   sslot1->nvalues, sslot2->nvalues,
 							   hasmatch1, hasmatch2,
@@ -2607,11 +2607,13 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
  * eqjoinsel_semi --- eqjoinsel for semi join
  *
  * (Also used for anti join, which we are supposed to estimate the same way.)
- * Caller has ensured that vardata1 is the LHS variable.
- * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
+ * Caller has ensured that vardata1 is the LHS variable; however, opfuncoid
+ * is for the original join operator, which might now need to have the inputs
+ * swapped in order to apply correctly.
  */
 static double
 eqjoinsel_semi(Oid opfuncoid, Oid collation,
+			   bool op_is_reversed,
 			   VariableStatData *vardata1, VariableStatData *vardata2,
 			   double nd1, double nd2,
 			   bool isdefault1, bool isdefault2,
@@ -2655,7 +2657,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		isdefault2 = false;
 	}
 
-	if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
+	if (have_mcvs1 && have_mcvs2)
 	{
 		/*
 		 * We have most-common-value lists for both relations.  Run through
@@ -2690,6 +2692,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
 		hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
 
 		eqjoinsel_find_matches(opfuncoid, collation,
+							   op_is_reversed,
 							   sslot1, sslot2,
 							   sslot1->nvalues, clamped_nvalues2,
 							   hasmatch1, hasmatch2,
@@ -2762,8 +2765,9 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
  * Identify matching MCVs for eqjoinsel_inner or eqjoinsel_semi.
  *
  * Inputs:
- *	opfuncoid: OID of equality function to use (might be cross-type)
+ *	opfuncoid: OID of equality function to use (might be reversed)
  *	collation: OID of collation to use
+ *	op_is_reversed: indicates that opfuncoid compares right type to left type
  *	sslot1, sslot2: MCV values for the lefthand and righthand inputs
  *	nvalues1, nvalues2: number of values to be considered (can be less than
  *		sslotN->nvalues, but not more)
@@ -2781,6 +2785,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
  */
 static void
 eqjoinsel_find_matches(Oid opfuncoid, Oid collation,
+					   bool op_is_reversed,
 					   AttStatsSlot *sslot1, AttStatsSlot *sslot2,
 					   int nvalues1, int nvalues2,
 					   bool *hasmatch1, bool *hasmatch2,
@@ -2809,9 +2814,24 @@ eqjoinsel_find_matches(Oid opfuncoid, Oid collation,
 	 * from eqjoinsel_inner.
 	 */
 	{
+		int			index1,
+					index2;
+
+		/* Set up to supply the values in the order the operator expects */
+		if (op_is_reversed)
+		{
+			index1 = 1;
+			index2 = 0;
+		}
+		else
+		{
+			index1 = 0;
+			index2 = 1;
+		}
+
 		for (int i = 0; i < nvalues1; i++)
 		{
-			fcinfo->args[0].value = sslot1->values[i];
+			fcinfo->args[index1].value = sslot1->values[i];
 
 			for (int j = 0; j < nvalues2; j++)
 			{
@@ -2819,7 +2839,7 @@ eqjoinsel_find_matches(Oid opfuncoid, Oid collation,
 
 				if (hasmatch2[j])
 					continue;
-				fcinfo->args[1].value = sslot2->values[j];
+				fcinfo->args[index2].value = sslot2->values[j];
 				fcinfo->isnull = false;
 				fresult = FunctionCallInvoke(fcinfo);
 				if (!fcinfo->isnull && DatumGetBool(fresult))
-- 
2.43.7

