From 1ecfe0c77335e893b1072cbbf725a1cae3ab902c Mon Sep 17 00:00:00 2001
From: dilipkumar <dilipbalaut@gmail.com>
Date: Fri, 16 Oct 2020 19:56:14 +0530
Subject: [PATCH v24 09/10] new compression method extension for zlib

Dilip Kumar
---
 contrib/Makefile                   |   1 +
 contrib/cmzlib/.gitignore          |   4 +
 contrib/cmzlib/Makefile            |  26 +++++
 contrib/cmzlib/cmzlib--1.0.sql     |  13 +++
 contrib/cmzlib/cmzlib.c            | 157 +++++++++++++++++++++++++++++
 contrib/cmzlib/cmzlib.control      |   5 +
 contrib/cmzlib/expected/cmzlib.out |  53 ++++++++++
 contrib/cmzlib/sql/cmzlib.sql      |  22 ++++
 8 files changed, 281 insertions(+)
 create mode 100644 contrib/cmzlib/.gitignore
 create mode 100644 contrib/cmzlib/Makefile
 create mode 100644 contrib/cmzlib/cmzlib--1.0.sql
 create mode 100644 contrib/cmzlib/cmzlib.c
 create mode 100644 contrib/cmzlib/cmzlib.control
 create mode 100644 contrib/cmzlib/expected/cmzlib.out
 create mode 100644 contrib/cmzlib/sql/cmzlib.sql

diff --git a/contrib/Makefile b/contrib/Makefile
index f27e458482..9e452d8dd0 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -12,6 +12,7 @@ SUBDIRS = \
 		bloom		\
 		btree_gin	\
 		btree_gist	\
+		cmzlib		\
 		citext		\
 		cube		\
 		dblink		\
diff --git a/contrib/cmzlib/.gitignore b/contrib/cmzlib/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/contrib/cmzlib/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/cmzlib/Makefile b/contrib/cmzlib/Makefile
new file mode 100644
index 0000000000..956fbe7cc8
--- /dev/null
+++ b/contrib/cmzlib/Makefile
@@ -0,0 +1,26 @@
+# contrib/cmzlib/Makefile
+
+MODULE_big = cmzlib
+OBJS = \
+	$(WIN32RES) \
+	cmzlib.o
+
+EXTENSION = cmzlib
+DATA = cmzlib--1.0.sql
+PGFILEDESC = "zlib compression method "
+
+SHLIB_LINK += $(filter -lz, $(LIBS))
+
+REGRESS = cmzlib
+
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/cmzlib
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/cmzlib/cmzlib--1.0.sql b/contrib/cmzlib/cmzlib--1.0.sql
new file mode 100644
index 0000000000..41f2f95870
--- /dev/null
+++ b/contrib/cmzlib/cmzlib--1.0.sql
@@ -0,0 +1,13 @@
+/* contrib/cm_lz4/cmzlib--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION cmzlib" to load this file. \quit
+
+CREATE FUNCTION zlibhandler(internal)
+RETURNS compression_am_handler
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Compression method
+CREATE ACCESS METHOD zlib TYPE COMPRESSION HANDLER zlibhandler;
+COMMENT ON ACCESS METHOD zlib IS 'zlib compression method';
diff --git a/contrib/cmzlib/cmzlib.c b/contrib/cmzlib/cmzlib.c
new file mode 100644
index 0000000000..686a7c7e0d
--- /dev/null
+++ b/contrib/cmzlib/cmzlib.c
@@ -0,0 +1,157 @@
+/*-------------------------------------------------------------------------
+ *
+ * cmzlib.c
+ *	  zlib compression method
+ *
+ * Copyright (c) 2015-2018, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  contrib/cmzlib/cmzlib.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "access/compressamapi.h"
+#include "access/toast_internals.h"
+
+#include "fmgr.h"
+#include "utils/builtins.h"
+
+#include <zlib.h>
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(zlibhandler);
+
+void		_PG_init(void);
+
+/*
+ * Module initialize function: initialize info about zlib
+ */
+void
+_PG_init(void)
+{
+
+}
+
+#define ZLIB_MAX_DICTIONARY_LENGTH		32768
+#define ZLIB_DICTIONARY_DELIM			(" ,")
+
+typedef struct
+{
+	int			level;
+	Bytef		dict[ZLIB_MAX_DICTIONARY_LENGTH];
+	unsigned int dictlen;
+} zlib_state;
+
+/*
+ * zlib_cmcompress - compression routine for zlib compression method
+ *
+ * Compresses source into dest using the default compression level.
+ * Returns the compressed varlena, or NULL if compression fails.
+ */
+static struct varlena *
+zlib_cmcompress(const struct varlena *value, int32 header_size)
+{
+	int32		valsize,
+				len;
+	struct varlena *tmp = NULL;
+	z_streamp	zp;
+	int			res;
+	zlib_state	state;
+
+	state.level = Z_DEFAULT_COMPRESSION;
+
+	zp = (z_streamp) palloc(sizeof(z_stream));
+	zp->zalloc = Z_NULL;
+	zp->zfree = Z_NULL;
+	zp->opaque = Z_NULL;
+
+	if (deflateInit(zp, state.level) != Z_OK)
+		elog(ERROR, "could not initialize compression library: %s", zp->msg);
+
+	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
+	tmp = (struct varlena *) palloc(valsize + header_size);
+	zp->next_in = (void *) VARDATA_ANY(value);
+	zp->avail_in = valsize;
+	zp->avail_out = valsize;
+	zp->next_out = (void *) ((char *) tmp + header_size);
+
+	do
+	{
+		res = deflate(zp, Z_FINISH);
+		if (res == Z_STREAM_ERROR)
+			elog(ERROR, "could not compress data: %s", zp->msg);
+	} while (zp->avail_in != 0);
+
+	Assert(res == Z_STREAM_END);
+
+	len = valsize - zp->avail_out;
+	if (deflateEnd(zp) != Z_OK)
+		elog(ERROR, "could not close compression stream: %s", zp->msg);
+	pfree(zp);
+
+	if (len > 0)
+	{
+		SET_VARSIZE_COMPRESSED(tmp, len + header_size);
+		return tmp;
+	}
+
+	pfree(tmp);
+	return NULL;
+}
+
+/*
+ * zlib_cmdecompress - decompression routine for zlib compression method
+ *
+ * Returns the decompressed varlena.
+ */
+static struct varlena *
+zlib_cmdecompress(const struct varlena *value, int32 header_size)
+{
+	struct varlena *result;
+	z_streamp	zp;
+	int			res = Z_OK;
+
+	zp = (z_streamp) palloc(sizeof(z_stream));
+	zp->zalloc = Z_NULL;
+	zp->zfree = Z_NULL;
+	zp->opaque = Z_NULL;
+
+	if (inflateInit(zp) != Z_OK)
+		elog(ERROR, "could not initialize compression library: %s", zp->msg);
+
+	zp->next_in = (void *) ((char *) value + header_size);
+	zp->avail_in = VARSIZE(value) - header_size;
+	zp->avail_out = VARRAWSIZE_4B_C(value);
+
+	result = (struct varlena *) palloc(zp->avail_out + VARHDRSZ);
+	SET_VARSIZE(result, zp->avail_out + VARHDRSZ);
+	zp->next_out = (void *) VARDATA(result);
+
+	while (zp->avail_in > 0)
+	{
+		res = inflate(zp, 0);
+		if (!(res == Z_OK || res == Z_STREAM_END))
+			elog(ERROR, "could not uncompress data: %s", zp->msg);
+	}
+
+	if (inflateEnd(zp) != Z_OK)
+		elog(ERROR, "could not close compression library: %s", zp->msg);
+
+	pfree(zp);
+	return result;
+}
+
+const CompressionAmRoutine zlib_compress_methods = {
+	.type = T_CompressionAmRoutine,
+	.datum_compress = zlib_cmcompress,
+	.datum_decompress = zlib_cmdecompress,
+	.datum_decompress_slice = NULL};
+
+Datum
+zlibhandler(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_POINTER(&zlib_compress_methods);
+}
diff --git a/contrib/cmzlib/cmzlib.control b/contrib/cmzlib/cmzlib.control
new file mode 100644
index 0000000000..2eb10f3a83
--- /dev/null
+++ b/contrib/cmzlib/cmzlib.control
@@ -0,0 +1,5 @@
+# cm_lz4 extension
+comment = 'cmzlib compression method'
+default_version = '1.0'
+module_pathname = '$libdir/cmzlib'
+relocatable = true
diff --git a/contrib/cmzlib/expected/cmzlib.out b/contrib/cmzlib/expected/cmzlib.out
new file mode 100644
index 0000000000..2b6fac7e0b
--- /dev/null
+++ b/contrib/cmzlib/expected/cmzlib.out
@@ -0,0 +1,53 @@
+CREATE EXTENSION cmzlib;
+-- zlib compression
+CREATE TABLE zlibtest(f1 TEXT COMPRESSION pglz);
+INSERT INTO zlibtest VALUES(repeat('1234567890',1004));
+INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004));
+SELECT length(f1) FROM zlibtest;
+ length 
+--------
+  10040
+  24096
+(2 rows)
+
+-- alter compression method with rewrite
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION pglz;
+\d+ zlibtest
+                                       Table "public.zlibtest"
+ Column | Type | Collation | Nullable | Default | Storage  | Compression | Stats target | Description 
+--------+------+-----------+----------+---------+----------+-------------+--------------+-------------
+ f1     | text |           |          |         | extended | pglz        |              | 
+
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION zlib;
+\d+ zlibtest
+                                       Table "public.zlibtest"
+ Column | Type | Collation | Nullable | Default | Storage  | Compression | Stats target | Description 
+--------+------+-----------+----------+---------+----------+-------------+--------------+-------------
+ f1     | text |           |          |         | extended | zlib        |              | 
+
+-- preserve old compression method
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION pglz PRESERVE (zlib);
+INSERT INTO zlibtest VALUES (repeat('1234567890',1004));
+\d+ zlibtest
+                                       Table "public.zlibtest"
+ Column | Type | Collation | Nullable | Default | Storage  | Compression | Stats target | Description 
+--------+------+-----------+----------+---------+----------+-------------+--------------+-------------
+ f1     | text |           |          |         | extended | pglz        |              | 
+
+SELECT pg_column_compression(f1) FROM zlibtest;
+ pg_column_compression 
+-----------------------
+ zlib
+ zlib
+ pglz
+(3 rows)
+
+SELECT length(f1) FROM zlibtest;
+ length 
+--------
+  10040
+  24096
+  10040
+(3 rows)
+
+DROP TABLE zlibtest;
diff --git a/contrib/cmzlib/sql/cmzlib.sql b/contrib/cmzlib/sql/cmzlib.sql
new file mode 100644
index 0000000000..ea8d206625
--- /dev/null
+++ b/contrib/cmzlib/sql/cmzlib.sql
@@ -0,0 +1,22 @@
+CREATE EXTENSION cmzlib;
+
+-- zlib compression
+CREATE TABLE zlibtest(f1 TEXT COMPRESSION pglz);
+INSERT INTO zlibtest VALUES(repeat('1234567890',1004));
+INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004));
+SELECT length(f1) FROM zlibtest;
+
+-- alter compression method with rewrite
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION pglz;
+\d+ zlibtest
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION zlib;
+\d+ zlibtest
+
+-- preserve old compression method
+ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION pglz PRESERVE (zlib);
+INSERT INTO zlibtest VALUES (repeat('1234567890',1004));
+\d+ zlibtest
+SELECT pg_column_compression(f1) FROM zlibtest;
+SELECT length(f1) FROM zlibtest;
+
+DROP TABLE zlibtest;
-- 
2.17.0

