From: Noah Misch <noah@leadboat.com>

Sort dump objects independent of OIDs, for the 7 holdout object types.

pg_dump sorts objects by their logical names, e.g. (nspname, relname,
tgname), before dependency-driven reordering.  That removes one source
of logically-identical databases differing in their schema-only dumps.
In other words, it helps with schema diffing.  The logical name sort
ignored essential sort keys for constraints, operators, PUBLICATION
... FOR TABLE, PUBLICATION ... FOR TABLES IN SCHEMA, operator classes,
and operator families.  pg_dump's sort then depended on object OID,
yielding spurious schema diffs.  After this change, OIDs affect dump
order only in the event of catalog corruption.  While pg_dump also
wrongly ignored pg_collation.collencoding, CREATE COLLATION restrictions
have been keeping that imperceptible in practical use.

Use techniques like we use for object types already having full sort key
coverage.  Where the pertinent queries weren't fetching the ignored sort
keys, this adds columns to those queries and stores those keys in memory
for the long term.

The ignorance of sort keys became more problematic when commit
172259afb563d35001410dc6daad78b250924038 added a schema diff test
sensitive to it.  Hence, back-patch as far as that commit.

Reviewed-by: FIXME
Discussion: https://postgr.es/m/FIXME
Backpatch-through: 18

diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index aa1589e..a1976fa 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -17,6 +17,7 @@
 
 #include <ctype.h>
 
+#include "catalog/pg_am_d.h"
 #include "catalog/pg_class_d.h"
 #include "catalog/pg_collation_d.h"
 #include "catalog/pg_extension_d.h"
@@ -945,6 +946,24 @@ findOprByOid(Oid oid)
 }
 
 /*
+ * findAccessMethodByOid
+ *	  finds the DumpableObject for the access method with the given oid
+ *	  returns NULL if not found
+ */
+AccessMethodInfo *
+findAccessMethodByOid(Oid oid)
+{
+	CatalogId	catId;
+	DumpableObject *dobj;
+
+	catId.tableoid = AccessMethodRelationId;
+	catId.oid = oid;
+	dobj = findObjectByCatalogId(catId);
+	Assert(dobj == NULL || dobj->objType == DO_ACCESS_METHOD);
+	return (AccessMethodInfo *) dobj;
+}
+
+/*
  * findCollationByOid
  *	  finds the DumpableObject for the collation with the given oid
  *	  returns NULL if not found
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 1937997..cf75e24 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -6181,6 +6181,8 @@ getOperators(Archive *fout)
 	int			i_oprnamespace;
 	int			i_oprowner;
 	int			i_oprkind;
+	int			i_oprleft;
+	int			i_oprright;
 	int			i_oprcode;
 
 	/*
@@ -6192,6 +6194,8 @@ getOperators(Archive *fout)
 						 "oprnamespace, "
 						 "oprowner, "
 						 "oprkind, "
+						 "oprleft, "
+						 "oprright, "
 						 "oprcode::oid AS oprcode "
 						 "FROM pg_operator");
 
@@ -6207,6 +6211,8 @@ getOperators(Archive *fout)
 	i_oprnamespace = PQfnumber(res, "oprnamespace");
 	i_oprowner = PQfnumber(res, "oprowner");
 	i_oprkind = PQfnumber(res, "oprkind");
+	i_oprleft = PQfnumber(res, "oprleft");
+	i_oprright = PQfnumber(res, "oprright");
 	i_oprcode = PQfnumber(res, "oprcode");
 
 	for (i = 0; i < ntups; i++)
@@ -6220,6 +6226,8 @@ getOperators(Archive *fout)
 			findNamespace(atooid(PQgetvalue(res, i, i_oprnamespace)));
 		oprinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_oprowner));
 		oprinfo[i].oprkind = (PQgetvalue(res, i, i_oprkind))[0];
+		oprinfo[i].oprleft = atooid(PQgetvalue(res, i, i_oprleft));
+		oprinfo[i].oprright = atooid(PQgetvalue(res, i, i_oprright));
 		oprinfo[i].oprcode = atooid(PQgetvalue(res, i, i_oprcode));
 
 		/* Decide whether we want to dump it */
@@ -6248,6 +6256,7 @@ getCollations(Archive *fout)
 	int			i_collname;
 	int			i_collnamespace;
 	int			i_collowner;
+	int			i_collencoding;
 
 	query = createPQExpBuffer();
 
@@ -6258,7 +6267,8 @@ getCollations(Archive *fout)
 
 	appendPQExpBufferStr(query, "SELECT tableoid, oid, collname, "
 						 "collnamespace, "
-						 "collowner "
+						 "collowner, "
+						 "collencoding "
 						 "FROM pg_collation");
 
 	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -6272,6 +6282,7 @@ getCollations(Archive *fout)
 	i_collname = PQfnumber(res, "collname");
 	i_collnamespace = PQfnumber(res, "collnamespace");
 	i_collowner = PQfnumber(res, "collowner");
+	i_collencoding = PQfnumber(res, "collencoding");
 
 	for (i = 0; i < ntups; i++)
 	{
@@ -6283,6 +6294,7 @@ getCollations(Archive *fout)
 		collinfo[i].dobj.namespace =
 			findNamespace(atooid(PQgetvalue(res, i, i_collnamespace)));
 		collinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_collowner));
+		collinfo[i].collencoding = atoi(PQgetvalue(res, i, i_collencoding));
 
 		/* Decide whether we want to dump it */
 		selectDumpableObject(&(collinfo[i].dobj), fout);
@@ -6431,6 +6443,7 @@ getOpclasses(Archive *fout)
 	OpclassInfo *opcinfo;
 	int			i_tableoid;
 	int			i_oid;
+	int			i_opcmethod;
 	int			i_opcname;
 	int			i_opcnamespace;
 	int			i_opcowner;
@@ -6440,7 +6453,7 @@ getOpclasses(Archive *fout)
 	 * system-defined opclasses at dump-out time.
 	 */
 
-	appendPQExpBufferStr(query, "SELECT tableoid, oid, opcname, "
+	appendPQExpBufferStr(query, "SELECT tableoid, oid, opcmethod, opcname, "
 						 "opcnamespace, "
 						 "opcowner "
 						 "FROM pg_opclass");
@@ -6453,6 +6466,7 @@ getOpclasses(Archive *fout)
 
 	i_tableoid = PQfnumber(res, "tableoid");
 	i_oid = PQfnumber(res, "oid");
+	i_opcmethod = PQfnumber(res, "opcmethod");
 	i_opcname = PQfnumber(res, "opcname");
 	i_opcnamespace = PQfnumber(res, "opcnamespace");
 	i_opcowner = PQfnumber(res, "opcowner");
@@ -6466,6 +6480,7 @@ getOpclasses(Archive *fout)
 		opcinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opcname));
 		opcinfo[i].dobj.namespace =
 			findNamespace(atooid(PQgetvalue(res, i, i_opcnamespace)));
+		opcinfo[i].opcmethod = atooid(PQgetvalue(res, i, i_opcmethod));
 		opcinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opcowner));
 
 		/* Decide whether we want to dump it */
@@ -6491,6 +6506,7 @@ getOpfamilies(Archive *fout)
 	OpfamilyInfo *opfinfo;
 	int			i_tableoid;
 	int			i_oid;
+	int			i_opfmethod;
 	int			i_opfname;
 	int			i_opfnamespace;
 	int			i_opfowner;
@@ -6502,7 +6518,7 @@ getOpfamilies(Archive *fout)
 	 * system-defined opfamilies at dump-out time.
 	 */
 
-	appendPQExpBufferStr(query, "SELECT tableoid, oid, opfname, "
+	appendPQExpBufferStr(query, "SELECT tableoid, oid, opfmethod, opfname, "
 						 "opfnamespace, "
 						 "opfowner "
 						 "FROM pg_opfamily");
@@ -6516,6 +6532,7 @@ getOpfamilies(Archive *fout)
 	i_tableoid = PQfnumber(res, "tableoid");
 	i_oid = PQfnumber(res, "oid");
 	i_opfname = PQfnumber(res, "opfname");
+	i_opfmethod = PQfnumber(res, "opfmethod");
 	i_opfnamespace = PQfnumber(res, "opfnamespace");
 	i_opfowner = PQfnumber(res, "opfowner");
 
@@ -6528,6 +6545,7 @@ getOpfamilies(Archive *fout)
 		opfinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opfname));
 		opfinfo[i].dobj.namespace =
 			findNamespace(atooid(PQgetvalue(res, i, i_opfnamespace)));
+		opfinfo[i].opfmethod = atooid(PQgetvalue(res, i, i_opfmethod));
 		opfinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_opfowner));
 
 		/* Decide whether we want to dump it */
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 39eef1d..a3e848d 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -258,6 +258,8 @@ typedef struct _oprInfo
 	DumpableObject dobj;
 	const char *rolname;
 	char		oprkind;
+	Oid			oprleft;
+	Oid			oprright;
 	Oid			oprcode;
 } OprInfo;
 
@@ -271,12 +273,14 @@ typedef struct _accessMethodInfo
 typedef struct _opclassInfo
 {
 	DumpableObject dobj;
+	Oid			opcmethod;
 	const char *rolname;
 } OpclassInfo;
 
 typedef struct _opfamilyInfo
 {
 	DumpableObject dobj;
+	Oid			opfmethod;
 	const char *rolname;
 } OpfamilyInfo;
 
@@ -284,6 +288,7 @@ typedef struct _collInfo
 {
 	DumpableObject dobj;
 	const char *rolname;
+	int			collencoding;
 } CollInfo;
 
 typedef struct _convInfo
@@ -757,6 +762,7 @@ extern TableInfo *findTableByOid(Oid oid);
 extern TypeInfo *findTypeByOid(Oid oid);
 extern FuncInfo *findFuncByOid(Oid oid);
 extern OprInfo *findOprByOid(Oid oid);
+extern AccessMethodInfo *findAccessMethodByOid(Oid oid);
 extern CollInfo *findCollationByOid(Oid oid);
 extern NamespaceInfo *findNamespaceByOid(Oid oid);
 extern ExtensionInfo *findExtensionByOid(Oid oid);
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 0b09777..ffae7b3 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -162,6 +162,8 @@ static DumpId postDataBoundId;
 
 
 static int	DOTypeNameCompare(const void *p1, const void *p2);
+static int	pgTypeNameCompare(Oid typid1, Oid typid2);
+static int	accessMethodNameCompare(Oid am1, Oid am2);
 static bool TopoSort(DumpableObject **objs,
 					 int numObjs,
 					 DumpableObject **ordering,
@@ -228,11 +230,24 @@ DOTypeNameCompare(const void *p1, const void *p2)
 	else if (obj2->namespace)
 		return 1;
 
-	/* Sort by name */
+	/*
+	 * Sort by name.  This differs from "Name:" in plain format output, which
+	 * is a _tocEntry.tag.  For example, DumpableObject.name of a constraint
+	 * is pg_constraint.conname, but _tocEntry.tag of a constraint is relname
+	 * and conname joined with a space.
+	 */
 	cmpval = strcmp(obj1->name, obj2->name);
 	if (cmpval != 0)
 		return cmpval;
 
+	/*
+	 * Sort by type.  This helps types that share a type priority without
+	 * sharing a unique name constraint, e.g. opclass and opfamily.
+	 */
+	cmpval = obj1->objType - obj2->objType;
+	if (cmpval != 0)
+		return cmpval;
+
 	/* To have a stable sort order, break ties for some object types */
 	if (obj1->objType == DO_FUNC || obj1->objType == DO_AGG)
 	{
@@ -246,22 +261,10 @@ DOTypeNameCompare(const void *p1, const void *p2)
 			return cmpval;
 		for (i = 0; i < fobj1->nargs; i++)
 		{
-			TypeInfo   *argtype1 = findTypeByOid(fobj1->argtypes[i]);
-			TypeInfo   *argtype2 = findTypeByOid(fobj2->argtypes[i]);
-
-			if (argtype1 && argtype2)
-			{
-				if (argtype1->dobj.namespace && argtype2->dobj.namespace)
-				{
-					cmpval = strcmp(argtype1->dobj.namespace->dobj.name,
-									argtype2->dobj.namespace->dobj.name);
-					if (cmpval != 0)
-						return cmpval;
-				}
-				cmpval = strcmp(argtype1->dobj.name, argtype2->dobj.name);
-				if (cmpval != 0)
-					return cmpval;
-			}
+			cmpval = pgTypeNameCompare(fobj1->argtypes[i],
+									   fobj2->argtypes[i]);
+			if (cmpval != 0)
+				return cmpval;
 		}
 	}
 	else if (obj1->objType == DO_OPERATOR)
@@ -273,6 +276,53 @@ DOTypeNameCompare(const void *p1, const void *p2)
 		cmpval = (oobj2->oprkind - oobj1->oprkind);
 		if (cmpval != 0)
 			return cmpval;
+		/* Within an oprkind, sort by argument type names */
+		cmpval = pgTypeNameCompare(oobj1->oprleft, oobj2->oprleft);
+		if (cmpval != 0)
+			return cmpval;
+		cmpval = pgTypeNameCompare(oobj1->oprright, oobj2->oprright);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_OPCLASS)
+	{
+		OpclassInfo *opcobj1 = *(OpclassInfo *const *) p1;
+		OpclassInfo *opcobj2 = *(OpclassInfo *const *) p2;
+
+		/* Sort by access method name, per pg_opclass_am_name_nsp_index */
+		cmpval = accessMethodNameCompare(opcobj1->opcmethod,
+										 opcobj2->opcmethod);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_OPFAMILY)
+	{
+		OpfamilyInfo *opfobj1 = *(OpfamilyInfo *const *) p1;
+		OpfamilyInfo *opfobj2 = *(OpfamilyInfo *const *) p2;
+
+		/* Sort by access method name, per pg_opfamily_am_name_nsp_index */
+		cmpval = accessMethodNameCompare(opfobj1->opfmethod,
+										 opfobj2->opfmethod);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_COLLATION)
+	{
+		CollInfo   *cobj1 = *(CollInfo *const *) p1;
+		CollInfo   *cobj2 = *(CollInfo *const *) p2;
+
+		/*
+		 * Sort by encoding, per pg_collation_name_enc_nsp_index.  This is
+		 * mostly academic, because CREATE COLLATION has restrictions to make
+		 * (nspname, collname) uniquely identify a collation within a given
+		 * DatabaseEncoding.  pg_import_system_collations() bypasses those
+		 * restrictions, but pg_dump+restore fails after a
+		 * pg_import_system_collations('my_schema') that creates collations
+		 * for a blend of encodings.
+		 */
+		cmpval = cobj1->collencoding - cobj2->collencoding;
+		if (cmpval != 0)
+			return cmpval;
 	}
 	else if (obj1->objType == DO_ATTRDEF)
 	{
@@ -317,11 +367,120 @@ DOTypeNameCompare(const void *p1, const void *p2)
 		if (cmpval != 0)
 			return cmpval;
 	}
+	else if (obj1->objType == DO_CONSTRAINT)
+	{
+		ConstraintInfo *robj1 = *(ConstraintInfo *const *) p1;
+		ConstraintInfo *robj2 = *(ConstraintInfo *const *) p2;
+
+		/* Sort by table name (table namespace was considered already) */
+		cmpval = strcmp(robj1->contable->dobj.name,
+						robj2->contable->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_PUBLICATION_REL)
+	{
+		PublicationRelInfo *probj1 = *(PublicationRelInfo *const *) p1;
+		PublicationRelInfo *probj2 = *(PublicationRelInfo *const *) p2;
+
+		/* Sort by publication name, since (namespace, name) match the rel */
+		cmpval = strcmp(probj1->publication->dobj.name,
+						probj2->publication->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	else if (obj1->objType == DO_PUBLICATION_TABLE_IN_SCHEMA)
+	{
+		PublicationSchemaInfo *psobj1 = *(PublicationSchemaInfo *const *) p1;
+		PublicationSchemaInfo *psobj2 = *(PublicationSchemaInfo *const *) p2;
 
-	/* Usually shouldn't get here, but if we do, sort by OID */
+		/* Sort by publication name, since ->name is just nspname */
+		cmpval = strcmp(psobj1->publication->dobj.name,
+						psobj2->publication->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+
+	/*
+	 * Shouldn't get here except after catalog corruption, but if we do, sort
+	 * by OID.  This may make logically-identical databases differ in the
+	 * order of objects in dump output.  Users will get spurious schema diffs.
+	 * Expect flaky failures of 002_pg_upgrade.pl test 'dump outputs from
+	 * original and restored regression databases match' if the regression
+	 * database contains objects allowing that test to reach here.  That's a
+	 * consequence of the test using "pg_restore -j", which doesn't fully
+	 * constrain OID assignment order.
+	 */
+	Assert(false);
 	return oidcmp(obj1->catId.oid, obj2->catId.oid);
 }
 
+/* Compare two OID-identified pg_type values by nspname, then by typname. */
+static int
+pgTypeNameCompare(Oid typid1, Oid typid2)
+{
+	TypeInfo   *typobj1;
+	TypeInfo   *typobj2;
+	int			cmpval;
+
+	if (typid1 == typid2)
+		return 0;
+
+	typobj1 = findTypeByOid(typid1);
+	typobj2 = findTypeByOid(typid2);
+
+	if (!typobj1 || !typobj2)
+	{
+		/*
+		 * getTypes() didn't find some OID.  Assume catalog corruption, e.g.
+		 * an oprright value without the corresponding OID in a pg_type row.
+		 * Report as "equal", so the caller uses the next available basis for
+		 * comparison, e.g. the next function argument.
+		 *
+		 * Unary operators have InvalidOid in oprleft, but caller's oprkind
+		 * comparison ensures this function compares a unary oprleft only to
+		 * another unary oprleft.  Hence, "typid1 == typid2" took care of
+		 * InvalidOid.
+		 */
+		Assert(false);
+		return 0;
+	}
+
+	if (!typobj1->dobj.namespace || !typobj2->dobj.namespace)
+		Assert(false);			/* catalog corruption */
+	else
+	{
+		cmpval = strcmp(typobj1->dobj.namespace->dobj.name,
+						typobj2->dobj.namespace->dobj.name);
+		if (cmpval != 0)
+			return cmpval;
+	}
+	return strcmp(typobj1->dobj.name, typobj2->dobj.name);
+}
+
+/* Compare two OID-identified pg_am values by amname. */
+static int
+accessMethodNameCompare(Oid am1, Oid am2)
+{
+	AccessMethodInfo *amobj1;
+	AccessMethodInfo *amobj2;
+
+	if (am1 == am2)
+		return 0;
+
+	amobj1 = findAccessMethodByOid(am1);
+	amobj2 = findAccessMethodByOid(am2);
+
+	if (!amobj1 || !amobj2)
+	{
+		/* catalog corruption: handle like pgTypeNameCompare() does */
+		Assert(false);
+		return 0;
+	}
+
+	return strcmp(amobj1->dobj.name, amobj2->dobj.name);
+}
+
 
 /*
  * Sort the given objects into a safe dump order using dependency
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
index 3a2eacd..1ec3fa3 100644
--- a/src/test/regress/expected/publication.out
+++ b/src/test/regress/expected/publication.out
@@ -1934,3 +1934,24 @@ RESET client_min_messages;
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;
+-- stage objects for pg_dump tests
+CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int);
+CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int);
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION dump_pub_qual_1ct FOR
+  TABLE ONLY pubme.t0 (c, d) WHERE (c > 0);
+CREATE PUBLICATION dump_pub_qual_2ct FOR
+  TABLE ONLY pubme.t0 (c) WHERE (c > 0),
+  TABLE ONLY pubme.t1 (c);
+CREATE PUBLICATION dump_pub_nsp_1ct FOR
+  TABLES IN SCHEMA pubme;
+CREATE PUBLICATION dump_pub_nsp_2ct FOR
+  TABLES IN SCHEMA pubme,
+  TABLES IN SCHEMA pubme2;
+CREATE PUBLICATION dump_pub_all FOR
+  TABLE ONLY pubme.t0,
+  TABLE ONLY pubme.t1 WHERE (c < 0),
+  TABLES IN SCHEMA pubme,
+  TABLES IN SCHEMA pubme2
+  WITH (publish_via_partition_root = true);
+RESET client_min_messages;
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
index c9e3091..2585f08 100644
--- a/src/test/regress/sql/publication.sql
+++ b/src/test/regress/sql/publication.sql
@@ -1229,3 +1229,25 @@ RESET client_min_messages;
 RESET SESSION AUTHORIZATION;
 DROP ROLE regress_publication_user, regress_publication_user2;
 DROP ROLE regress_publication_user_dummy;
+
+-- stage objects for pg_dump tests
+CREATE SCHEMA pubme CREATE TABLE t0 (c int, d int) CREATE TABLE t1 (c int);
+CREATE SCHEMA pubme2 CREATE TABLE t0 (c int, d int);
+SET client_min_messages = 'ERROR';
+CREATE PUBLICATION dump_pub_qual_1ct FOR
+  TABLE ONLY pubme.t0 (c, d) WHERE (c > 0);
+CREATE PUBLICATION dump_pub_qual_2ct FOR
+  TABLE ONLY pubme.t0 (c) WHERE (c > 0),
+  TABLE ONLY pubme.t1 (c);
+CREATE PUBLICATION dump_pub_nsp_1ct FOR
+  TABLES IN SCHEMA pubme;
+CREATE PUBLICATION dump_pub_nsp_2ct FOR
+  TABLES IN SCHEMA pubme,
+  TABLES IN SCHEMA pubme2;
+CREATE PUBLICATION dump_pub_all FOR
+  TABLE ONLY pubme.t0,
+  TABLE ONLY pubme.t1 WHERE (c < 0),
+  TABLES IN SCHEMA pubme,
+  TABLES IN SCHEMA pubme2
+  WITH (publish_via_partition_root = true);
+RESET client_min_messages;