From 0e477fdcdf25a1e59163d748df03e570684a1955 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Sat, 27 Feb 2021 09:03:50 +0500
Subject: [PATCH v7 1/9] Allow alternate compression methods for
 wal_compression

TODO: bump XLOG_PAGE_MAGIC
---
 doc/src/sgml/config.sgml                      | 17 +++++
 src/backend/Makefile                          |  2 +-
 src/backend/access/transam/xlog.c             | 10 +++
 src/backend/access/transam/xloginsert.c       | 67 ++++++++++++++++---
 src/backend/access/transam/xlogreader.c       | 64 +++++++++++++++++-
 src/backend/utils/misc/guc.c                  | 11 +++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/access/xlog.h                     |  1 +
 src/include/access/xlog_internal.h            | 16 +++++
 src/include/access/xlogrecord.h               | 11 ++-
 10 files changed, 187 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 7e32b0686c..70effa6345 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3137,6 +3137,23 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-wal-compression-method" xreflabel="wal_compression_method">
+      <term><varname>wal_compressionion_method</varname> (<type>enum</type>)
+      <indexterm>
+       <primary><varname>wal_compression_method</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        This parameter selects the compression method used to compress WAL when
+        <varname>wal_compression</varname> is enabled.
+        The supported methods are pglz and zlib.
+        The default value is <literal>pglz</literal>.
+        Only superusers can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-init-zero" xreflabel="wal_init_zero">
       <term><varname>wal_init_zero</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/src/backend/Makefile b/src/backend/Makefile
index 0da848b1fd..3af216ddfc 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -48,7 +48,7 @@ OBJS = \
 LIBS := $(filter-out -lpgport -lpgcommon, $(LIBS)) $(LDAP_LIBS_BE) $(ICU_LIBS)
 
 # The backend doesn't need everything that's in LIBS, however
-LIBS := $(filter-out -lz -lreadline -ledit -ltermcap -lncurses -lcurses, $(LIBS))
+LIBS := $(filter-out -lreadline -ledit -ltermcap -lncurses -lcurses, $(LIBS))
 
 ifeq ($(with_systemd),yes)
 LIBS += -lsystemd
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8d163f190f..b7f8e12aea 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -99,6 +99,7 @@ bool		EnableHotStandby = false;
 bool		fullPageWrites = true;
 bool		wal_log_hints = false;
 bool		wal_compression = false;
+int			wal_compression_method = WAL_COMPRESSION_PGLZ;
 char	   *wal_consistency_checking_string = NULL;
 bool	   *wal_consistency_checking = NULL;
 bool		wal_init_zero = true;
@@ -180,6 +181,15 @@ const struct config_enum_entry recovery_target_action_options[] = {
 	{NULL, 0, false}
 };
 
+/* Note that due to conditional compilation, offsets within the array are not static */
+const struct config_enum_entry wal_compression_options[] = {
+	{"pglz", WAL_COMPRESSION_PGLZ, false},
+#ifdef  HAVE_LIBZ
+	{"zlib", WAL_COMPRESSION_ZLIB, false},
+#endif
+	{NULL, 0, false}
+};
+
 /*
  * Statistics for current checkpoint are collected in this global struct.
  * Because only the checkpointer or a stand-alone backend can perform
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 32b4cc84e7..5ab07621d6 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -33,8 +33,18 @@
 #include "storage/proc.h"
 #include "utils/memutils.h"
 
+#ifdef HAVE_LIBZ
+#include <zlib.h>
+/* zlib compressBound is not a macro */
+#define ZLIB_MAX_BLCKSZ		BLCKSZ + (BLCKSZ>>12) + (BLCKSZ>>14) + (BLCKSZ>>25) + 13
+#else
+#define ZLIB_MAX_BLCKSZ		0
+#endif
+
 /* Buffer size required to store a compressed version of backup block image */
-#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)
+#define PGLZ_MAX_BLCKSZ		PGLZ_MAX_OUTPUT(BLCKSZ)
+
+#define COMPRESS_BUFSIZE	Max(PGLZ_MAX_BLCKSZ, ZLIB_MAX_BLCKSZ)
 
 /*
  * For each block reference registered with XLogRegisterBuffer, we fill in
@@ -58,7 +68,7 @@ typedef struct
 								 * backup block data in XLogRecordAssemble() */
 
 	/* buffer to store a compressed version of backup block image */
-	char		compressed_page[PGLZ_MAX_BLCKSZ];
+	char		compressed_page[COMPRESS_BUFSIZE];
 } registered_buffer;
 
 static registered_buffer *registered_buffers;
@@ -113,7 +123,8 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
 									   XLogRecPtr RedoRecPtr, bool doPageWrites,
 									   XLogRecPtr *fpw_lsn, int *num_fpi);
 static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
-									uint16 hole_length, char *dest, uint16 *dlen);
+									uint16 hole_length, char *dest,
+									uint16 *dlen, WalCompression compression);
 
 /*
  * Begin constructing a WAL record. This must be called before the
@@ -625,16 +636,26 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 				cbimg.hole_length = 0;
 			}
 
+			bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE;
+
 			/*
 			 * Try to compress a block image if wal_compression is enabled
 			 */
 			if (wal_compression)
 			{
+				int compression;
+				/* The current compression is stored in the WAL record */
+				wal_compression_name(wal_compression_method); /* Range check */
+				compression = walmethods[wal_compression_method].walmethod;
+				Assert(compression < (1 << BKPIMAGE_COMPRESS_BITS));
+				bimg.bimg_info |=
+					compression << BKPIMAGE_COMPRESS_OFFSET_BITS;
 				is_compressed =
 					XLogCompressBackupBlock(page, bimg.hole_offset,
 											cbimg.hole_length,
 											regbuf->compressed_page,
-											&compressed_len);
+											&compressed_len,
+											wal_compression_method);
 			}
 
 			/*
@@ -652,8 +673,6 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 			rdt_datas_last->next = &regbuf->bkp_rdatas[0];
 			rdt_datas_last = rdt_datas_last->next;
 
-			bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE;
-
 			/*
 			 * If WAL consistency checking is enabled for the resource manager
 			 * of this WAL record, a full-page image is included in the record
@@ -827,7 +846,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
  */
 static bool
 XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
-						char *dest, uint16 *dlen)
+						char *dest, uint16 *dlen, WalCompression compression)
 {
 	int32		orig_len = BLCKSZ - hole_length;
 	int32		len;
@@ -853,12 +872,42 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
 	else
 		source = page;
 
+	switch (compression)
+	{
+	case WAL_COMPRESSION_PGLZ:
+		len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
+		break;
+
+#ifdef HAVE_LIBZ
+	case WAL_COMPRESSION_ZLIB:
+		{
+			unsigned long	len_l = COMPRESS_BUFSIZE;
+			int ret;
+			ret = compress2((Bytef*)dest, &len_l, (Bytef*)source, orig_len, 1);
+			if (ret != Z_OK)
+				len_l = -1;
+			len = len_l;
+			break;
+		}
+#endif
+
+	default:
+		/*
+		 * It should be impossible to get here for unsupported algorithms,
+		 * which cannot be assigned if they're not enabled at compile time.
+		 */
+		ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("unknown compression method requested: %d(%s)",
+				 compression, wal_compression_name(compression))));
+
+	}
+
 	/*
-	 * We recheck the actual size even if pglz_compress() reports success and
+	 * We recheck the actual size even if compression reports success and
 	 * see if the number of bytes saved by compression is larger than the
 	 * length of extra data needed for the compressed version of block image.
 	 */
-	len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
 	if (len >= 0 &&
 		len + extra_bytes < orig_len)
 	{
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 42738eb940..0d8830fc50 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -33,6 +33,10 @@
 #include "utils/memutils.h"
 #endif
 
+#ifdef HAVE_LIBZ
+#include <zlib.h>
+#endif
+
 static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
 			pg_attribute_printf(2, 3);
 static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
@@ -1535,6 +1539,30 @@ XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
 	}
 }
 
+/* This is a mapping indexed by wal_compression */
+// XXX: maybe this is better done as a GUC hook to assign the 1) method; and 2) level
+struct walcompression walmethods[] = {
+	{"pglz",	WAL_COMPRESSION_PGLZ},
+	{"zlib",	WAL_COMPRESSION_ZLIB},
+};
+
+/*
+ * Return a statically allocated string associated with the given compression
+ * method.
+ * This is here to be visible to frontend tools like pg_rewind.
+ */
+const char *
+wal_compression_name(WalCompression compression)
+{
+	/*
+	 * This could index into the guc array, except that it's compiled
+	 * conditionally and unsupported methods are elided.
+	 */
+	if (compression < sizeof(walmethods)/sizeof(*walmethods))
+		return walmethods[compression].name;
+	return "???";
+}
+
 /*
  * Restore a full-page image from a backup block attached to an XLOG record.
  *
@@ -1557,9 +1585,41 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
 	if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
 	{
+		int compression_method = BKPIMAGE_COMPRESSION(bkpb->bimg_info);
 		/* If a backup block image is compressed, decompress it */
-		if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
-							BLCKSZ - bkpb->hole_length, true) < 0)
+		int32 decomp_result = -1;
+		switch (compression_method)
+		{
+		case WAL_COMPRESSION_PGLZ:
+			decomp_result = pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
+							BLCKSZ - bkpb->hole_length, true);
+			break;
+
+#ifdef HAVE_LIBZ
+		case WAL_COMPRESSION_ZLIB:
+		{
+			unsigned long decomp_result_l;
+			decomp_result_l = BLCKSZ - bkpb->hole_length;
+			if (uncompress((Bytef*)tmp.data, &decomp_result_l,
+						(Bytef*)ptr, bkpb->bimg_len) == Z_OK)
+				decomp_result = decomp_result_l;
+			else
+				decomp_result = -1;
+			break;
+		}
+#endif
+
+		default:
+			report_invalid_record(record, "image at %X/%X is compressed with unsupported codec, block %d (%d/%s)",
+								  (uint32) (record->ReadRecPtr >> 32),
+								  (uint32) record->ReadRecPtr,
+								  block_id,
+								  compression_method,
+								  wal_compression_name(compression_method));
+			return false;
+		}
+
+		if (decomp_result < 0)
 		{
 			report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
 								  LSN_FORMAT_ARGS(record->ReadRecPtr),
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee731044b6..99932582ba 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -548,6 +548,7 @@ extern const struct config_enum_entry archive_mode_options[];
 extern const struct config_enum_entry recovery_target_action_options[];
 extern const struct config_enum_entry sync_method_options[];
 extern const struct config_enum_entry dynamic_shared_memory_options[];
+extern const struct config_enum_entry wal_compression_options[];
 
 /*
  * GUC option variables that are exported from this module
@@ -4825,6 +4826,16 @@ static struct config_enum ConfigureNamesEnum[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"wal_compression_method", PGC_SUSET, WAL_SETTINGS,
+			gettext_noop("Set the method used to compress full page images in the WAL."),
+			NULL
+		},
+		&wal_compression_method,
+		WAL_COMPRESSION_PGLZ, wal_compression_options,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"dynamic_shared_memory_type", PGC_POSTMASTER, RESOURCES_MEM,
 			gettext_noop("Selects the dynamic shared memory implementation used."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 6e36e4c2ef..baed4d9228 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -219,6 +219,7 @@
 #wal_log_hints = off			# also do full page writes of non-critical updates
 					# (change requires restart)
 #wal_compression = off			# enable compression of full-page writes
+#wal_compression_method = pglz		# pglz, zlib
 #wal_init_zero = on			# zero-fill new WAL files
 #wal_recycle = on			# recycle WAL files
 #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 77187c12be..2f3886431a 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -117,6 +117,7 @@ extern bool EnableHotStandby;
 extern bool fullPageWrites;
 extern bool wal_log_hints;
 extern bool wal_compression;
+extern int	wal_compression_method;
 extern bool wal_init_zero;
 extern bool wal_recycle;
 extern bool *wal_consistency_checking;
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 26a743b6b6..ceca0f7189 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -324,4 +324,20 @@ extern bool InArchiveRecovery;
 extern bool StandbyMode;
 extern char *recoveryRestoreCommand;
 
+struct walcompression
+{
+	char	*name;
+	int	walmethod;	/* Compression method to be stored in WAL */
+};
+
+extern struct walcompression walmethods[];
+
+typedef enum WalCompression
+{
+	WAL_COMPRESSION_PGLZ,
+	WAL_COMPRESSION_ZLIB,
+} WalCompression;
+
+extern const char *wal_compression_name(WalCompression compression);
+
 #endif							/* XLOG_INTERNAL_H */
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index 80c92a2498..7107cf6186 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -114,7 +114,7 @@ typedef struct XLogRecordBlockHeader
  * present is (BLCKSZ - <length of "hole" bytes>).
  *
  * Additionally, when wal_compression is enabled, we will try to compress full
- * page images using the PGLZ compression algorithm, after removing the "hole".
+ * page images, after removing the "hole".
  * This can reduce the WAL volume, but at some extra cost of CPU spent
  * on the compression during WAL logging. In this case, since the "hole"
  * length cannot be calculated by subtracting the number of page image bytes
@@ -147,6 +147,15 @@ typedef struct XLogRecordBlockImageHeader
 #define BKPIMAGE_IS_COMPRESSED		0x02	/* page image is compressed */
 #define BKPIMAGE_APPLY		0x04	/* page image should be restored during
 									 * replay */
+#define BKPIMAGE_COMPRESS_METHOD1	0x08	/* bits to encode compression method */
+#define BKPIMAGE_COMPRESS_METHOD2	0x10	/* 0=pglz; 1=zlib; */
+
+/* How many bits to shift to extract compression */
+#define	BKPIMAGE_COMPRESS_OFFSET_BITS	3
+/* How many bits are for compression */
+#define	BKPIMAGE_COMPRESS_BITS		2
+/* Extract the compression from the bimg_info */
+#define	BKPIMAGE_COMPRESSION(info)	((info >> BKPIMAGE_COMPRESS_OFFSET_BITS) & ((1<<BKPIMAGE_COMPRESS_BITS) - 1))
 
 /*
  * Extra header information used when page image has "hole" and
-- 
2.17.0

