From 277d969c31349b22e2c7726935d681e72aacaece Mon Sep 17 00:00:00 2001
From: John Naylor <jcnaylor@gmail.com>
Date: Fri, 25 Jan 2019 17:18:28 -0500
Subject: [PATCH v19 3/3] During pg_upgrade, conditionally skip transfer of
 FSMs.

If a heap on the old cluster has 4 pages or fewer, don't copy or
link the FSM. This will reduce space usage for installations with
large numbers of small tables.
---
 src/bin/pg_upgrade/info.c        | 18 ++++++-
 src/bin/pg_upgrade/pg_upgrade.h  |  6 +++
 src/bin/pg_upgrade/relfilenode.c | 84 +++++++++++++++++++++++++++-----
 3 files changed, 93 insertions(+), 15 deletions(-)

diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 2f925f086c..55d4911d10 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -200,6 +200,8 @@ create_rel_filename_map(const char *old_data, const char *new_data,
 
 	map->old_db_oid = old_db->db_oid;
 	map->new_db_oid = new_db->db_oid;
+	map->relpages = old_rel->relpages;
+	map->relkind = old_rel->relkind;
 
 	/*
 	 * old_relfilenode might differ from pg_class.oid (and hence
@@ -415,9 +417,11 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	int			ntups;
 	int			relnum;
 	int			num_rels = 0;
+	int			relpages;
 	char	   *nspname = NULL;
 	char	   *relname = NULL;
 	char	   *tablespace = NULL;
+	char	   *relkind = NULL;
 	int			i_spclocation,
 				i_nspname,
 				i_relname,
@@ -425,7 +429,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 				i_indtable,
 				i_toastheap,
 				i_relfilenode,
-				i_reltablespace;
+				i_reltablespace,
+				i_relpages,
+				i_relkind;
 	char		query[QUERY_ALLOC];
 	char	   *last_namespace = NULL,
 			   *last_tablespace = NULL;
@@ -494,7 +500,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	 */
 	snprintf(query + strlen(query), sizeof(query) - strlen(query),
 			 "SELECT all_rels.*, n.nspname, c.relname, "
-			 "  c.relfilenode, c.reltablespace, %s "
+			 "  c.relfilenode, c.reltablespace, c.relpages, c.relkind, %s "
 			 "FROM (SELECT * FROM regular_heap "
 			 "      UNION ALL "
 			 "      SELECT * FROM toast_heap "
@@ -526,6 +532,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 	i_relfilenode = PQfnumber(res, "relfilenode");
 	i_reltablespace = PQfnumber(res, "reltablespace");
 	i_spclocation = PQfnumber(res, "spclocation");
+	i_relpages = PQfnumber(res, "relpages");
+	i_relkind = PQfnumber(res, "relkind");
 
 	for (relnum = 0; relnum < ntups; relnum++)
 	{
@@ -555,6 +563,12 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
 		relname = PQgetvalue(res, relnum, i_relname);
 		curr->relname = pg_strdup(relname);
 
+		relpages = atoi(PQgetvalue(res, relnum, i_relpages));
+		curr->relpages = relpages;
+
+		relkind = PQgetvalue(res, relnum, i_relkind);
+		curr->relkind = relkind[0];
+
 		curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
 		curr->tblsp_alloc = false;
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 2f67eee22b..baeb8ff0f8 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -147,6 +147,8 @@ typedef struct
 	char	   *tablespace;		/* tablespace path; "" for cluster default */
 	bool		nsp_alloc;		/* should nspname be freed? */
 	bool		tblsp_alloc;	/* should tablespace be freed? */
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
 } RelInfo;
 
 typedef struct
@@ -173,6 +175,10 @@ typedef struct
 	 */
 	Oid			old_relfilenode;
 	Oid			new_relfilenode;
+
+	int32		relpages;		/* # of pages -- see pg_class.h */
+	char		relkind;		/* relation kind -- see pg_class.h */
+
 	/* the rest are used only for logging and error reporting */
 	char	   *nspname;		/* namespaces */
 	char	   *relname;
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index 0c78073f0e..ffcec23d7c 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -14,10 +14,12 @@
 #include <sys/stat.h>
 #include "catalog/pg_class_d.h"
 #include "access/transam.h"
+#include "storage/freespace.h"
 
 
 static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static Size transfer_relfile(FileNameMap *map, const char *suffix, bool vm_must_add_frozenbit);
+static bool new_cluster_needs_fsm(char relkind, Size first_seg_size);
 
 
 /*
@@ -144,6 +146,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 	int			mapnum;
 	bool		vm_crashsafe_match = true;
 	bool		vm_must_add_frozenbit = false;
+	Size		first_seg_size = 0;
 
 	/*
 	 * Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -165,18 +168,22 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
 		if (old_tablespace == NULL ||
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
-			/* transfer primary file */
-			transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
+			/* Transfer main fork and return size of the first segment. */
+			first_seg_size = transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
 			{
 				/*
-				 * Copy/link any fsm and vm files, if they exist
+				 * Transfer any FSM files if they would be created in the
+				 * new cluster.
 				 */
-				transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+				if (new_cluster_needs_fsm(maps[mapnum].relkind, first_seg_size))
+					(void) transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
+
+				/* Transfer any VM files if we can trust their contents. */
 				if (vm_crashsafe_match)
-					transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
+					(void) transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
 			}
 		}
 	}
@@ -188,9 +195,10 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
  *
  * Copy or link file from old cluster to new one.  If vm_must_add_frozenbit
  * is true, visibility map forks are converted and rewritten, even in link
- * mode.
+ * mode.  Returns size of the first segment.  We only care about the accuracy
+ * of the size for small heap relations.
  */
-static void
+static Size
 transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit)
 {
 	char		old_file[MAXPGPATH];
@@ -198,6 +206,7 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 	int			segno;
 	char		extent_suffix[65];
 	struct stat statbuf;
+	Size		first_seg_size = 0;
 
 	/*
 	 * Now copy/link any related segments as well. Remember, PG breaks large
@@ -234,16 +243,40 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 			{
 				/* File does not exist?  That's OK, just return */
 				if (errno == ENOENT)
-					return;
+					return first_seg_size;
 				else
-					pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
-							 map->nspname, map->relname, old_file, new_file,
-							 strerror(errno));
+					goto fatal;
 			}
 
 			/* If file is empty, just return */
 			if (statbuf.st_size == 0)
-				return;
+				return first_seg_size;
+		}
+
+		/* Save size of the first segment of the main fork. */
+
+		else if (map->relpages <= HEAP_FSM_CREATION_THRESHOLD &&
+				 (map->relkind == RELKIND_RELATION ||
+				  map->relkind == RELKIND_TOASTVALUE))
+		{
+			/*
+			 * In this case, if pg_class.relpages is wrong, it's possible
+			 * that a FSM will be skipped when we actually need it.  To guard
+			 * against this, we verify the size of the first segment.
+			 */
+			if (stat(old_file, &statbuf) != 0)
+				goto fatal;
+			else
+				first_seg_size = statbuf.st_size;
+		}
+		else
+		{
+			/*
+			 * For indexes etc., we don't care if pg_class.relpages is wrong,
+			 * since we always transfer their FSMs.  For heaps, we might
+			 * transfer a FSM when we don't need to, but this is harmless.
+			 */
+			first_seg_size = Min(map->relpages, RELSEG_SIZE) * BLCKSZ;
 		}
 
 		unlink(new_file);
@@ -277,4 +310,29 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 					linkFile(old_file, new_file, map->nspname, map->relname);
 			}
 	}
+
+fatal:
+	pg_fatal("error while checking for file existence \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
+			 map->nspname, map->relname, old_file, new_file,
+			 strerror(errno));
+}
+
+/*
+ * Return false for small heaps if we're upgrading across version 12,
+ * the first where small heap relations don't have FSMs by default.
+ */
+static bool
+new_cluster_needs_fsm(char relkind, Size first_seg_size)
+{
+	if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE)
+		return true;
+
+	if (first_seg_size > HEAP_FSM_CREATION_THRESHOLD * BLCKSZ)
+		return true;
+
+	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100 &&
+		GET_MAJOR_VERSION(new_cluster.major_version) >= 1200)
+		return false;
+	else
+		return true;
 }
-- 
2.17.1

