From 7bfc811c475e16f449ed29ee395d84d77166f047 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Fri, 19 Nov 2021 01:05:43 +0000
Subject: [PATCH v15 2/3] Add test archive module.

---
 contrib/Makefile                              |   1 +
 contrib/basic_archive/.gitignore              |   4 +
 contrib/basic_archive/Makefile                |  20 ++
 contrib/basic_archive/basic_archive.c         | 288 ++++++++++++++++++
 contrib/basic_archive/basic_archive.conf      |   3 +
 .../basic_archive/expected/basic_archive.out  |  29 ++
 contrib/basic_archive/sql/basic_archive.sql   |  22 ++
 7 files changed, 367 insertions(+)
 create mode 100644 contrib/basic_archive/.gitignore
 create mode 100644 contrib/basic_archive/Makefile
 create mode 100644 contrib/basic_archive/basic_archive.c
 create mode 100644 contrib/basic_archive/basic_archive.conf
 create mode 100644 contrib/basic_archive/expected/basic_archive.out
 create mode 100644 contrib/basic_archive/sql/basic_archive.sql

diff --git a/contrib/Makefile b/contrib/Makefile
index 87bf87ab90..e3e221308b 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -9,6 +9,7 @@ SUBDIRS = \
 		amcheck		\
 		auth_delay	\
 		auto_explain	\
+		basic_archive	\
 		bloom		\
 		btree_gin	\
 		btree_gist	\
diff --git a/contrib/basic_archive/.gitignore b/contrib/basic_archive/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/contrib/basic_archive/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/basic_archive/Makefile b/contrib/basic_archive/Makefile
new file mode 100644
index 0000000000..14d036e1c4
--- /dev/null
+++ b/contrib/basic_archive/Makefile
@@ -0,0 +1,20 @@
+# contrib/basic_archive/Makefile
+
+MODULES = basic_archive
+PGFILEDESC = "basic_archive - basic archive module"
+
+REGRESS = basic_archive
+REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/basic_archive/basic_archive.conf
+
+NO_INSTALLCHECK = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/basic_archive
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
new file mode 100644
index 0000000000..faba349fd8
--- /dev/null
+++ b/contrib/basic_archive/basic_archive.c
@@ -0,0 +1,288 @@
+/*-------------------------------------------------------------------------
+ *
+ * basic_archive.c
+ *
+ * This file demonstrates a basic archive library implementation that is
+ * roughly equivalent to the following shell command:
+ *
+ * 		test ! -f /path/to/dest && cp /path/to/src /path/to/dest
+ *
+ * One notable difference between this module and the shell command above
+ * is that this module first copies the file to a temporary destination,
+ * syncs it to disk, and then durably moves it to the final destination.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/basic_archive/basic_archive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "miscadmin.h"
+#include "postmaster/pgarch.h"
+#include "storage/copydir.h"
+#include "storage/fd.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+void _PG_init(void);
+void _PG_archive_module_init(ArchiveModuleCallbacks *cb);
+
+static char *archive_directory = NULL;
+
+static bool basic_archive_configured(void);
+static bool basic_archive_file(const char *file, const char *path);
+static bool check_archive_directory(char **newval, void **extra, GucSource source);
+static bool compare_files(const char *file1, const char *file2);
+
+/*
+ * _PG_init
+ *
+ * Defines the module's GUC.
+ */
+void
+_PG_init(void)
+{
+	DefineCustomStringVariable("basic_archive.archive_directory",
+							   gettext_noop("Archive file destination directory."),
+							   NULL,
+							   &archive_directory,
+							   "",
+							   PGC_SIGHUP,
+							   0,
+							   check_archive_directory, NULL, NULL);
+
+	EmitWarningsOnPlaceholders("basic_archive");
+}
+
+/*
+ * _PG_archive_module_init
+ *
+ * Returns the module's archiving callbacks.
+ */
+void
+_PG_archive_module_init(ArchiveModuleCallbacks *cb)
+{
+	AssertVariableIsOfType(&_PG_archive_module_init, ArchiveModuleInit);
+
+	cb->check_configured_cb = basic_archive_configured;
+	cb->archive_file_cb = basic_archive_file;
+}
+
+/*
+ * check_archive_directory
+ *
+ * Checks that the provided archive directory exists.
+ */
+static bool
+check_archive_directory(char **newval, void **extra, GucSource source)
+{
+	struct stat st;
+
+	/*
+	 * The default value is an empty string, so we have to accept that value.
+	 * Our check_configured callback also checks for this and prevents archiving
+	 * from proceeding if it is still empty.
+	 */
+	if (*newval == NULL || *newval[0] == '\0')
+		return true;
+
+	/*
+	 * Make sure the file paths won't be too long.  The docs indicate that the
+	 * file names to be archived can be up to 64 characters long.
+	 */
+	if (strlen(*newval) + 64 + 2 >= MAXPGPATH)
+	{
+		GUC_check_errdetail("archive directory too long");
+		return false;
+	}
+
+	/*
+	 * Do a basic sanity check that the specified archive directory exists.  It
+	 * could be removed at some point in the future, so we still need to be
+	 * prepared for it not to exist in the actual archiving logic.
+	 */
+	if (stat(*newval, &st) != 0 || !S_ISDIR(st.st_mode))
+	{
+		GUC_check_errdetail("specified archive directory does not exist");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * basic_archive_configured
+ *
+ * Checks that archive_directory is not blank.
+ */
+static bool
+basic_archive_configured(void)
+{
+	return archive_directory != NULL && archive_directory[0] != '\0';
+}
+
+/*
+ * basic_archive_file
+ *
+ * Archives one file.
+ */
+static bool
+basic_archive_file(const char *file, const char *path)
+{
+	char		destination[MAXPGPATH];
+	char		temp[MAXPGPATH + 64];
+	struct stat st;
+
+	ereport(DEBUG3,
+			(errmsg("archiving \"%s\" via basic_archive", file)));
+
+	snprintf(destination, MAXPGPATH, "%s/%s", archive_directory, file);
+
+	/*
+	 * First, check if the file has already been archived.  If it already exists
+	 * and has the same contents as the file we're trying to archive, we can
+	 * return success (after ensuring the file is persisted to disk). This
+	 * scenario is possible if the server crashed after archiving the file but
+	 * before renaming its .ready file to .done.
+	 *
+	 * If the archive file already exists but has different contents, something
+	 * might be wrong, so we just fail.
+	 */
+	if (stat(destination, &st) == 0)
+	{
+		if (compare_files(path, destination))
+		{
+			ereport(DEBUG3,
+					(errmsg("archive file \"%s\" already exists with identical contents",
+							destination)));
+
+			fsync_fname(destination, false);
+			fsync_fname(archive_directory, true);
+
+			return true;
+		}
+
+		ereport(WARNING,
+				(errmsg("archive file \"%s\" already exists", destination)));
+		return false;
+	}
+	else if (errno != ENOENT)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m", destination)));
+
+	/*
+	 * Pick a sufficiently random name for the temporary file so that a
+	 * collision is unlikely.  This helps avoid problems in case a temporary
+	 * file was left around after a crash or another server happens to be
+	 * archiving to the same directory.
+	 */
+	snprintf(temp, sizeof(temp), "%s/%s.%s.%d.%d", archive_directory,
+			 "archtemp", file, MyProcPid, (int) (random() & 0x7fff));
+
+	/*
+	 * Copy the file to its temporary destination.  Note that this will fail if
+	 * temp already exists.
+	 */
+	copy_file(unconstify(char *, path), temp);
+
+	/*
+	 * Sync the temporary file to disk and move it to its final destination.
+	 * This will fail if destination already exists.
+	 */
+	(void) durable_rename_excl(temp, destination, ERROR);
+
+	ereport(DEBUG1,
+			(errmsg("archived \"%s\" via basic_archive", file)));
+
+	return true;
+}
+
+/*
+ * compare_files
+ *
+ * Returns whether the contents of the files are the same.
+ */
+static bool
+compare_files(const char *file1, const char *file2)
+{
+#define CMP_BUF_SIZE (4096)
+	char		buf1[CMP_BUF_SIZE];
+	char		buf2[CMP_BUF_SIZE];
+	int			fd1;
+	int			fd2;
+	bool		ret = true;
+
+	fd1 = OpenTransientFile(file1, O_RDONLY | PG_BINARY);
+	if (fd1 < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", file1)));
+
+	fd2 = OpenTransientFile(file2, O_RDONLY | PG_BINARY);
+	if (fd2 < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", file2)));
+
+	for (;;)
+	{
+		int		nbytes = 0;
+		int		buf1_len = 0;
+		int		buf2_len = 0;
+
+		while (buf1_len < CMP_BUF_SIZE)
+		{
+			nbytes = read(fd1, buf1 + buf1_len, CMP_BUF_SIZE - buf1_len);
+			if (nbytes < 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", file1)));
+			else if (nbytes == 0)
+				break;
+
+			buf1_len += nbytes;
+		}
+
+		while (buf2_len < CMP_BUF_SIZE)
+		{
+			nbytes = read(fd2, buf2 + buf2_len, CMP_BUF_SIZE - buf2_len);
+			if (nbytes < 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not read file \"%s\": %m", file2)));
+			else if (nbytes == 0)
+				break;
+
+			buf2_len += nbytes;
+		}
+
+		if (buf1_len != buf2_len || memcmp(buf1, buf2, buf1_len) != 0)
+		{
+			ret = false;
+			break;
+		}
+		else if (buf1_len == 0)
+			break;
+	}
+
+	if (CloseTransientFile(fd1) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", file1)));
+
+	if (CloseTransientFile(fd2) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", file2)));
+
+	return ret;
+}
diff --git a/contrib/basic_archive/basic_archive.conf b/contrib/basic_archive/basic_archive.conf
new file mode 100644
index 0000000000..b26b2d4144
--- /dev/null
+++ b/contrib/basic_archive/basic_archive.conf
@@ -0,0 +1,3 @@
+archive_mode = 'on'
+archive_library = 'basic_archive'
+basic_archive.archive_directory = '.'
diff --git a/contrib/basic_archive/expected/basic_archive.out b/contrib/basic_archive/expected/basic_archive.out
new file mode 100644
index 0000000000..0015053e0f
--- /dev/null
+++ b/contrib/basic_archive/expected/basic_archive.out
@@ -0,0 +1,29 @@
+CREATE TABLE test (a INT);
+SELECT 1 FROM pg_switch_wal();
+ ?column? 
+----------
+        1
+(1 row)
+
+DO $$
+DECLARE
+	archived bool;
+	loops int := 0;
+BEGIN
+	LOOP
+		archived := count(*) > 0 FROM pg_ls_dir('.', false, false) a
+			WHERE a ~ '^[0-9A-F]{24}$';
+		IF archived OR loops > 120 * 10 THEN EXIT; END IF;
+		PERFORM pg_sleep(0.1);
+		loops := loops + 1;
+	END LOOP;
+END
+$$;
+SELECT count(*) > 0 FROM pg_ls_dir('.', false, false) a
+	WHERE a ~ '^[0-9A-F]{24}$';
+ ?column? 
+----------
+ t
+(1 row)
+
+DROP TABLE test;
diff --git a/contrib/basic_archive/sql/basic_archive.sql b/contrib/basic_archive/sql/basic_archive.sql
new file mode 100644
index 0000000000..14e236d57a
--- /dev/null
+++ b/contrib/basic_archive/sql/basic_archive.sql
@@ -0,0 +1,22 @@
+CREATE TABLE test (a INT);
+SELECT 1 FROM pg_switch_wal();
+
+DO $$
+DECLARE
+	archived bool;
+	loops int := 0;
+BEGIN
+	LOOP
+		archived := count(*) > 0 FROM pg_ls_dir('.', false, false) a
+			WHERE a ~ '^[0-9A-F]{24}$';
+		IF archived OR loops > 120 * 10 THEN EXIT; END IF;
+		PERFORM pg_sleep(0.1);
+		loops := loops + 1;
+	END LOOP;
+END
+$$;
+
+SELECT count(*) > 0 FROM pg_ls_dir('.', false, false) a
+	WHERE a ~ '^[0-9A-F]{24}$';
+
+DROP TABLE test;
-- 
2.25.1

