From e51539968dec49235cf8579afb142a6fb3529967 Mon Sep 17 00:00:00 2001
From: Ildus Kurbangaliev <i.kurbangaliev@gmail.com>
Date: Mon, 18 Jun 2018 16:00:43 +0300
Subject: [PATCH 8/8] Add documentation for custom compression methods

Signed-off-by: Ildus Kurbangaliev <i.kurbangaliev@gmail.com>
---
 doc/src/sgml/catalogs.sgml                 |  19 ++-
 doc/src/sgml/compression-am.sgml           | 178 +++++++++++++++++++++
 doc/src/sgml/filelist.sgml                 |   1 +
 doc/src/sgml/indexam.sgml                  |   2 +-
 doc/src/sgml/postgres.sgml                 |   1 +
 doc/src/sgml/ref/alter_table.sgml          |  18 +++
 doc/src/sgml/ref/create_access_method.sgml |   5 +-
 doc/src/sgml/ref/create_table.sgml         |  13 ++
 doc/src/sgml/storage.sgml                  |   6 +-
 9 files changed, 236 insertions(+), 7 deletions(-)
 create mode 100644 doc/src/sgml/compression-am.sgml

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 0fd792ff1a..c8551c2fc3 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -57,7 +57,7 @@
 
      <row>
       <entry><link linkend="catalog-pg-am"><structname>pg_am</structname></link></entry>
-      <entry>index access methods</entry>
+      <entry>access methods</entry>
      </row>
 
      <row>
@@ -70,6 +70,11 @@
       <entry>access method support functions</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-attr-compression"><structname>pg_attr_compression</structname></link></entry>
+      <entry>table columns compression relationships and options</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-attrdef"><structname>pg_attrdef</structname></link></entry>
       <entry>column default values</entry>
@@ -587,8 +592,10 @@
    The catalog <structname>pg_am</structname> stores information about
    relation access methods.  There is one row for each access method supported
    by the system.
-   Currently, only indexes have access methods.  The requirements for index
-   access methods are discussed in detail in <xref linkend="indexam"/>.
+   Currently, compression and index access methods are supported.
+   The requirements for index access methods are discussed in detail
+   in <xref linkend="indexam"/>, for compression access methods
+   could be found in <xref linkend="compression-am"/>.
   </para>
 
   <table>
@@ -892,6 +899,12 @@
 
  </sect1>
 
+ <sect1 id="catalog-pg-attr-compression">
+  <title><structname>pg_attr_compression</structname></title>
+  <indexterm zone="catalog-pg-attr-compression">
+   <primary>pg_attr_compression</primary>
+  </indexterm>
+ </sect1>
 
  <sect1 id="catalog-pg-attrdef">
   <title><structname>pg_attrdef</structname></title>
diff --git a/doc/src/sgml/compression-am.sgml b/doc/src/sgml/compression-am.sgml
new file mode 100644
index 0000000000..e23d817910
--- /dev/null
+++ b/doc/src/sgml/compression-am.sgml
@@ -0,0 +1,178 @@
+<!-- doc/src/sgml/compression-am.sgml -->
+
+<chapter id="compression-am">
+ <title>Compression Access Methods</title>
+  <para>
+   <productname>PostgreSQL</productname> supports two internal
+   built-in compression methods (<literal>pglz</literal>
+   and <literal>zlib</literal>), and also allows to add more custom compression
+   methods through compression access methods interface.
+  </para>
+
+ <sect1 id="builtin-compression-methods">
+  <title>Built-in Compression Access Methods</title>
+  <para>
+   These compression access methods are included in
+   <productname>PostgreSQL</productname> and don't need any external extensions.
+  </para>
+  <table id="builtin-compression-methods-table">
+   <title>Built-in Compression Access Methods</title>
+   <tgroup cols="2">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Options</entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry><literal>pglz</literal></entry>
+      <entry>
+       <literal>min_input_size (int)</literal>,
+       <literal>max_input_size (int)</literal>,
+       <literal>min_comp_rate (int)</literal>,
+       <literal>first_success_by (int)</literal>,
+       <literal>match_size_good (int)</literal>,
+       <literal>match_size_drop (int)</literal>
+      </entry>
+     </row>
+     <row>
+      <entry><literal>zlib</literal></entry>
+      <entry><literal>level (text)</literal>, <literal>dict (text)</literal></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+  <para>
+  Note that for <literal>zlib</literal> to work it should be installed in the
+  system and <productname>PostgreSQL</productname> should be compiled without
+  <literal>--without-zlib</literal> flag.
+  </para>
+ </sect1>
+
+ <sect1 id="compression-api">
+  <title>Basic API for compression methods</title>
+
+  <para>
+   Each compression access method is described by a row in the
+   <link linkend="catalog-pg-am"><structname>pg_am</structname></link>
+   system catalog.  The <structname>pg_am</structname> entry
+   specifies a name and a <firstterm>handler function</firstterm> for the access
+   method.  These entries can be created and deleted using the
+   <xref linkend="sql-create-access-method"/> and
+   <xref linkend="sql-drop-access-method"/> SQL commands.
+  </para>
+
+  <para>
+   A compression access method handler function must be declared to accept a
+   single argument of type <type>internal</type> and to return the
+   pseudo-type <type>compression_am_handler</type>.  The argument is a dummy value that
+   simply serves to prevent handler functions from being called directly from
+   SQL commands.  The result of the function must be a palloc'd struct of
+   type <structname>CompressionAmRoutine</structname>, which contains everything
+   that the core code needs to know to make use of the compression access method.
+   The <structname>CompressionAmRoutine</structname> struct, also called the access
+   method's <firstterm>API struct</firstterm>, contains pointers to support
+   functions for the access method. These support functions are plain C
+   functions and are not visible or callable at the SQL level.
+   The support functions are described in <xref linkend="compression-am-functions"/>.
+  </para>
+
+  <para>
+   The structure <structname>CompressionAmRoutine</structname> is defined thus:
+<programlisting>
+typedef struct CompressionAmRoutine
+{
+    NodeTag     type;
+
+    cmcheck_function        cmcheck;        /* can be NULL */
+    cminitstate_function    cminitstate;    /* can be NULL */
+    cmcompress_function     cmcompress;
+    cmcompress_function     cmdecompress;
+} CompressionAmRoutine;
+</programlisting>
+  </para>
+ </sect1>
+ <sect1 id="compression-am-functions">
+  <title>Compression Access Method Functions</title>
+
+  <para>
+   The compression and auxiliary functions that an compression access
+   method must provide in <structname>CompressionAmRoutine</structname> are:
+  </para>
+
+  <para>
+<programlisting>
+void
+cmcheck (Form_pg_attribute att, List *options);
+</programlisting>
+   Called when an attribute is linked with compression access method. Could
+   be used to check compatibility with the attribute and other additional
+   checks.
+  </para>
+
+  <para>
+  Compression functions take special struct
+  <structname>CompressionAmOptions</structname> as first
+  parameter. This struct contains per backend cached state for each
+  attribute compression record. CompressionAmOptions is defined thus:
+
+<programlisting>
+typedef struct CompressionAmOptions
+{
+    Oid         acoid;          /* Oid of attribute compression */
+    Oid         amoid;          /* Oid of compression access method */
+    List       *acoptions;      /* Parsed options, used for comparison */
+    CompressionAmRoutine *amroutine;    /* compression access method routine */
+
+    /* result of cminitstate function will be put here */
+    void       *acstate;
+} CompressionAmOptions;
+</programlisting>
+  </para>
+
+  <para>
+  The <structfield>acstate</structfield> field is used to keep temporary state
+  between compression functions calls and stores the result of
+  <structfield>cminitstate</structfield> function. It could be useful to store
+  the parsed view of the compression options.
+  </para>
+
+  <para>
+  Note that any invalidation of <structname>pg_attr_compression</structname> relation
+  will cause all the cached <structfield>acstate</structfield> options cleared.
+  They will be recreated on the next compression functions calls.
+  </para>
+
+  <para>
+<programlisting>
+void *
+cminitstate (Oid acoid, List *options);
+</programlisting>
+  Called when <structname>CompressionAmOptions</structname> is being
+  initialized. Can return a pointer to memory that will be passed between
+  compression functions calls.
+  </para>
+
+  <para>
+<programlisting>
+struct varlena *
+cmcompress (CompressionAmOptions *cmoptions,
+            const struct varlena *value);
+</programlisting>
+   Function is used to compress varlena. Could return NULL if data is
+   incompressible. If it returns varlena bigger than original the core will
+   not use it.
+  </para>
+
+  <para>
+<programlisting>
+struct varlena *
+cmdecompress (CompressionAmOptions *cmoptions,
+              const struct varlena *value);
+</programlisting>
+   Function is used to decompress varlena.
+  </para>
+
+ </sect1>
+</chapter>
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index a03ea1427b..8f482a60dd 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -90,6 +90,7 @@
 <!ENTITY brin       SYSTEM "brin.sgml">
 <!ENTITY planstats    SYSTEM "planstats.sgml">
 <!ENTITY indexam    SYSTEM "indexam.sgml">
+<!ENTITY compression-am SYSTEM "compression-am.sgml">
 <!ENTITY nls        SYSTEM "nls.sgml">
 <!ENTITY plhandler  SYSTEM "plhandler.sgml">
 <!ENTITY fdwhandler SYSTEM "fdwhandler.sgml">
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 05102724ea..54110050d1 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -47,7 +47,7 @@
   <title>Basic API Structure for Indexes</title>
 
   <para>
-   Each index access method is described by a row in the
+   Each index access method is described by a row with INDEX type in the
    <link linkend="catalog-pg-am"><structname>pg_am</structname></link>
    system catalog.  The <structname>pg_am</structname> entry
    specifies a name and a <firstterm>handler function</firstterm> for the access
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 96d196d229..17f2cc98b3 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -251,6 +251,7 @@
   &custom-scan;
   &geqo;
   &indexam;
+  &compression-am;
   &generic-wal;
   &btree;
   &gist;
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index e360728c02..09ad7c8aa2 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -53,6 +53,7 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="parameter">name</replaceable>
     ALTER [ COLUMN ] <replaceable class="parameter">column_name</replaceable> SET ( <replaceable class="parameter">attribute_option</replaceable> = <replaceable class="parameter">value</replaceable> [, ... ] )
     ALTER [ COLUMN ] <replaceable class="parameter">column_name</replaceable> RESET ( <replaceable class="parameter">attribute_option</replaceable> [, ... ] )
     ALTER [ COLUMN ] <replaceable class="parameter">column_name</replaceable> SET STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN }
+    ALTER [ COLUMN ] <replaceable class="parameter">column_name</replaceable> SET COMPRESSION <replaceable class="parameter">compression_am</replaceable> [ WITH (<replaceable class="parameter">compression_am_options</replaceable>) ] [ PRESERVE (<replaceable class="parameter">compression_preserve_list</replaceable>) ]
     ADD <replaceable class="parameter">table_constraint</replaceable> [ NOT VALID ]
     ADD <replaceable class="parameter">table_constraint_using_index</replaceable>
     ALTER CONSTRAINT <replaceable class="parameter">constraint_name</replaceable> [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]
@@ -359,6 +360,23 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term>
+     <literal>SET COMPRESSION <replaceable class="parameter">compression_method_name</replaceable> [ WITH (<replaceable class="parameter">compression_method_options</replaceable>) ] [ PRESERVE (<replaceable class="parameter">compression_preserve_list</replaceable>) ]</literal>
+    </term>
+    <listitem>
+     <para>
+      This form adds compression to a column. Compression access method should be
+      created with <xref linkend="sql-create-access-method"/>. If compression
+      method has options they could be specified with <literal>WITH</literal>
+      parameter. The PRESERVE list contains list of compression access methods
+      used on the column and determines which of them should be kept on the
+      column. Without PRESERVE or partial list of compression methods table
+      will be rewritten.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>ADD <replaceable class="parameter">table_constraint</replaceable> [ NOT VALID ]</literal></term>
     <listitem>
diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml
index 851c5e63be..a35005aca3 100644
--- a/doc/src/sgml/ref/create_access_method.sgml
+++ b/doc/src/sgml/ref/create_access_method.sgml
@@ -61,7 +61,7 @@ CREATE ACCESS METHOD <replaceable class="parameter">name</replaceable>
     <listitem>
      <para>
       This clause specifies the type of access method to define.
-      Only <literal>INDEX</literal> is supported at present.
+      <literal>INDEX</literal> and <literal>COMPRESSION</literal> types are supported at present.
      </para>
     </listitem>
    </varlistentry>
@@ -79,6 +79,9 @@ CREATE ACCESS METHOD <replaceable class="parameter">name</replaceable>
       be <type>index_am_handler</type>.  The C-level API that the handler
       function must implement varies depending on the type of access method.
       The index access method API is described in <xref linkend="indexam"/>.
+      For <literal>COMPRESSION</literal> access methods, the type must be
+      <type>compression_am_handler</type>. The compression access method API
+      is described in <xref linkend="compression-am"/>.
      </para>
     </listitem>
    </varlistentry>
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index e94fe2c3b6..2d192148de 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -65,6 +65,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
   GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( <replaceable>sequence_options</replaceable> ) ] |
   UNIQUE <replaceable class="parameter">index_parameters</replaceable> |
   PRIMARY KEY <replaceable class="parameter">index_parameters</replaceable> |
+  COMPRESSION <replaceable class="parameter">compression_access_method</replaceable> [ WITH (<replaceable class="parameter">compression_am_options</replaceable>) ] |
   REFERENCES <replaceable class="parameter">reftable</replaceable> [ ( <replaceable class="parameter">refcolumn</replaceable> ) ] [ MATCH FULL | MATCH PARTIAL | MATCH SIMPLE ]
     [ ON DELETE <replaceable class="parameter">referential_action</replaceable> ] [ ON UPDATE <replaceable class="parameter">referential_action</replaceable> ] }
 [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]
@@ -920,6 +921,18 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>COMPRESSION <replaceable class="parameter">compression_access_method</replaceable> [ WITH (<replaceable class="parameter">compression_am_options</replaceable>) ]</literal></term>
+    <listitem>
+     <para>
+      This clause adds compression to a column. Compression method could be
+      created with <xref linkend="sql-create-access-method"/>. If compression
+      method has options they could be specified by <literal>WITH</literal>
+      parameter.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry id="sql-createtable-exclude">
     <term><literal>EXCLUDE [ USING <replaceable class="parameter">index_method</replaceable> ] ( <replaceable class="parameter">exclude_element</replaceable> WITH <replaceable class="parameter">operator</replaceable> [, ... ] ) <replaceable class="parameter">index_parameters</replaceable> [ WHERE ( <replaceable class="parameter">predicate</replaceable> ) ]</literal></term>
     <listitem>
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index cbdad0c3fb..89a5889d0f 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -385,10 +385,12 @@ Further details appear in <xref linkend="storage-toast-inmemory"/>.
 </para>
 
 <para>
-The compression technique used for either in-line or out-of-line compressed
+The default compression technique used for either in-line or out-of-line compressed
 data is a fairly simple and very fast member
 of the LZ family of compression techniques.  See
-<filename>src/common/pg_lzcompress.c</filename> for the details.
+<filename>src/common/pg_lzcompress.c</filename> for the details. Also custom
+compressions could be used. Look at <xref linkend="compression-am"/> for
+more information.
 </para>
 
 <sect2 id="storage-toast-ondisk">
-- 
2.21.0

