From 1c251ff129f8e9f33d14df547d97ba549b109648 Mon Sep 17 00:00:00 2001
From: Nikita Glukhov <n.gluhov@postgrespro.ru>
Date: Tue, 5 Dec 2017 02:38:50 +0300
Subject: [PATCH 1/2] spgist-compress-method-8

---
 doc/src/sgml/spgist.sgml                | 54 ++++++++++++++++++++++++++-------
 src/backend/access/spgist/spgdoinsert.c | 44 +++++++++++++++++++--------
 src/backend/access/spgist/spgutils.c    | 23 ++++++++++++--
 src/backend/access/spgist/spgvalidate.c | 24 ++++++++++++++-
 src/include/access/spgist.h             |  5 ++-
 src/include/access/spgist_private.h     |  8 +++--
 6 files changed, 127 insertions(+), 31 deletions(-)

diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index 139c8ed..55c1b06 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -240,20 +240,21 @@
 
  <para>
   There are five user-defined methods that an index operator class for
-  <acronym>SP-GiST</acronym> must provide.  All five follow the convention
-  of accepting two <type>internal</type> arguments, the first of which is a
-  pointer to a C struct containing input values for the support method,
-  while the second argument is a pointer to a C struct where output values
-  must be placed.  Four of the methods just return <type>void</type>, since
-  all their results appear in the output struct; but
+  <acronym>SP-GiST</acronym> must provide and one optional. All five mandatory
+  methos follow the convention of accepting two <type>internal</type> arguments,
+  the first of which is a pointer to a C struct containing input values for 
+  the support method, while the second argument is a pointer to a C struct 
+  where output values must be placed.  Four of the methods just return 
+  <type>void</type>, since all their results appear in the output struct; but
   <function>leaf_consistent</function> additionally returns a <type>boolean</type> result.
   The methods must not modify any fields of their input structs.  In all
   cases, the output struct is initialized to zeroes before calling the
-  user-defined method.
+  user-defined method. Optional method <function>compress</function> accepts
+  datum to be indexed and returns values which actually will be indexed.
  </para>
 
  <para>
-  The five user-defined methods are:
+  The five mandatory user-defined methods are:
  </para>
 
  <variablelist>
@@ -283,6 +284,7 @@ typedef struct spgConfigOut
 {
     Oid         prefixType;     /* Data type of inner-tuple prefixes */
     Oid         labelType;      /* Data type of inner-tuple node labels */
+    Oid         leafType;       /* Data type of leaf */
     bool        canReturnData;  /* Opclass can reconstruct original data */
     bool        longValuesOK;   /* Opclass can cope with values &gt; 1 page */
 } spgConfigOut;
@@ -303,7 +305,15 @@ typedef struct spgConfigOut
       <structfield>longValuesOK</structfield> should be set true only when the
       <structfield>attType</structfield> is of variable length and the operator
       class is capable of segmenting long values by repeated suffixing
-      (see <xref linkend="spgist-limits"/>).
+      (see <xref linkend="spgist-limits"/>). <structfield>leafType</structfield>
+      usually has the same value as <structfield>attType</structfield> but if
+      it's different then optional method  <function>compress</function>
+      should be provided. Method  <function>compress</function> is responsible
+      for transformation from <structfield>attType</structfield> to 
+      <structfield>leafType</structfield>. In this case all other function
+      should accept <structfield>leafType</structfield> values. Note: both
+      consistent functions will get <structfield>scankeys</structfield>
+      unchanged, without <function>compress</function> transformation.
      </para>
      </listitem>
     </varlistentry>
@@ -624,7 +634,8 @@ typedef struct spgInnerConsistentOut
        <structfield>reconstructedValue</structfield> is the value reconstructed for the
        parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
        <function>inner_consistent</function> function did not provide a value at the
-       parent level.
+       parent level. <structfield>reconstructedValue</structfield> should be always a
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type.
        <structfield>traversalValue</structfield> is a pointer to any traverse data
        passed down from the previous call of <function>inner_consistent</function>
        on the parent index tuple, or NULL at the root level.
@@ -730,7 +741,8 @@ typedef struct spgLeafConsistentOut
        <structfield>reconstructedValue</structfield> is the value reconstructed for the
        parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
        <function>inner_consistent</function> function did not provide a value at the
-       parent level.
+       parent level. <structfield>reconstructedValue</structfield> should be always a
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type. 
        <structfield>traversalValue</structfield> is a pointer to any traverse data
        passed down from the previous call of <function>inner_consistent</function>
        on the parent index tuple, or NULL at the root level.
@@ -757,6 +769,26 @@ typedef struct spgLeafConsistentOut
     </varlistentry>
    </variablelist>
 
+ <para>
+  The optional user-defined method is:
+ </para>
+
+ <variablelist>
+    <varlistentry>
+     <term><function>Datum compress(Datum in)</function></term>
+     <listitem>
+      <para>
+       Converts the data item into a format suitable for physical storage in 
+       an index page. It accepts
+       <structname>spgConfigIn</structname>.<structfield>attType</structfield>
+       value and return
+       <structname>spgConfigOut</structname>.<structfield>leafType</structfield>
+       value. Output value should not be toasted.
+      </para>
+     </listitem>
+    </varlistentry>
+  </variablelist>
+
   <para>
    All the SP-GiST support methods are normally called in a short-lived
    memory context; that is, <varname>CurrentMemoryContext</varname> will be reset
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index a5f4c40..edf86f1 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -1899,21 +1899,41 @@ spgdoinsert(Relation index, SpGistState *state,
 	FmgrInfo   *procinfo = NULL;
 
 	/*
-	 * Look up FmgrInfo of the user-defined choose function once, to save
-	 * cycles in the loop below.
+	 * Prepare the leaf datum to insert.
+	 *
+	 * If there is an optional "compress" method, call it to form the leaf
+	 * datum from the input datum. Otherwise we will store the input datum as
+	 * is. (We have to detoast it, though. We assume the "compress" method to
+	 * return an untoasted value.)
 	 */
 	if (!isnull)
-		procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
+	{
+		if (OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+		{
+			procinfo = index_getprocinfo(index, 1, SPGIST_COMPRESS_PROC);
+			leafDatum = FunctionCall1Coll(procinfo,
+										  index->rd_indcollation[0],
+										  datum);
+		}
+		else
+		{
+			Assert(state->attLeafType.type == state->attType.type);
+
+			if (state->attType.attlen == -1)
+				leafDatum = PointerGetDatum(PG_DETOAST_DATUM(datum));
+			else
+				leafDatum = datum;
+		}
+	}
+	else
+		leafDatum = (Datum) 0;
 
 	/*
-	 * Since we don't use index_form_tuple in this AM, we have to make sure
-	 * value to be inserted is not toasted; FormIndexDatum doesn't guarantee
-	 * that.
+	 * Look up FmgrInfo of the user-defined choose function once, to save
+	 * cycles in the loop below.
 	 */
-	if (!isnull && state->attType.attlen == -1)
-		datum = PointerGetDatum(PG_DETOAST_DATUM(datum));
-
-	leafDatum = datum;
+	if (!isnull)
+		procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
 
 	/*
 	 * Compute space needed for a leaf tuple containing the given datum.
@@ -1923,7 +1943,7 @@ spgdoinsert(Relation index, SpGistState *state,
 	 */
 	if (!isnull)
 		leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-			SpGistGetTypeSize(&state->attType, leafDatum);
+			SpGistGetTypeSize(&state->attLeafType, leafDatum);
 	else
 		leafSize = SGDTSIZE + sizeof(ItemIdData);
 
@@ -2138,7 +2158,7 @@ spgdoinsert(Relation index, SpGistState *state,
 					{
 						leafDatum = out.result.matchNode.restDatum;
 						leafSize = SGLTHDRSZ + sizeof(ItemIdData) +
-							SpGistGetTypeSize(&state->attType, leafDatum);
+							SpGistGetTypeSize(&state->attLeafType, leafDatum);
 					}
 
 					/*
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index bd5301f..668e3c4 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -124,7 +124,23 @@ spgGetCache(Relation index)
 						  PointerGetDatum(&cache->config));
 
 		/* Get the information we need about each relevant datatype */
-		fillTypeDesc(&cache->attType, atttype);
+		if (OidIsValid(cache->config.leafType) &&
+			cache->config.leafType != atttype)
+		{
+			if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("compress method must not defined when leaf type is different from input type")));
+
+			fillTypeDesc(&cache->attType, atttype);
+			fillTypeDesc(&cache->attLeafType, cache->config.leafType);
+		}
+		else
+		{
+			fillTypeDesc(&cache->attType, atttype);
+			cache->attLeafType = cache->attType;
+		}
+
 		fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
 		fillTypeDesc(&cache->attLabelType, cache->config.labelType);
 
@@ -164,6 +180,7 @@ initSpGistState(SpGistState *state, Relation index)
 
 	state->config = cache->config;
 	state->attType = cache->attType;
+	state->attLeafType = cache->attLeafType;
 	state->attPrefixType = cache->attPrefixType;
 	state->attLabelType = cache->attLabelType;
 
@@ -618,7 +635,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
 	/* compute space needed (note result is already maxaligned) */
 	size = SGLTHDRSZ;
 	if (!isnull)
-		size += SpGistGetTypeSize(&state->attType, datum);
+		size += SpGistGetTypeSize(&state->attLeafType, datum);
 
 	/*
 	 * Ensure that we can replace the tuple with a dead tuple later.  This
@@ -634,7 +651,7 @@ spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
 	tup->nextOffset = InvalidOffsetNumber;
 	tup->heapPtr = *heapPtr;
 	if (!isnull)
-		memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
+		memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
 
 	return tup;
 }
diff --git a/src/backend/access/spgist/spgvalidate.c b/src/backend/access/spgist/spgvalidate.c
index 157cf2a..514da47 100644
--- a/src/backend/access/spgist/spgvalidate.c
+++ b/src/backend/access/spgist/spgvalidate.c
@@ -52,6 +52,10 @@ spgvalidate(Oid opclassoid)
 	OpFamilyOpFuncGroup *opclassgroup;
 	int			i;
 	ListCell   *lc;
+	spgConfigIn	configIn;
+	spgConfigOut configOut;
+	Oid			configOutLefttype = InvalidOid;
+	Oid			configOutRighttype = InvalidOid;
 
 	/* Fetch opclass information */
 	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
@@ -100,6 +104,15 @@ spgvalidate(Oid opclassoid)
 		switch (procform->amprocnum)
 		{
 			case SPGIST_CONFIG_PROC:
+				ok = check_amproc_signature(procform->amproc, VOIDOID, true,
+											2, 2, INTERNALOID, INTERNALOID);
+				configIn.attType = procform->amproclefttype;
+				OidFunctionCall2(procform->amproc,
+								 PointerGetDatum(&configIn),
+								 PointerGetDatum(&configOut));
+				configOutLefttype = procform->amproclefttype;
+				configOutRighttype = procform->amprocrighttype;
+				break;
 			case SPGIST_CHOOSE_PROC:
 			case SPGIST_PICKSPLIT_PROC:
 			case SPGIST_INNER_CONSISTENT_PROC:
@@ -110,6 +123,15 @@ spgvalidate(Oid opclassoid)
 				ok = check_amproc_signature(procform->amproc, BOOLOID, true,
 											2, 2, INTERNALOID, INTERNALOID);
 				break;
+			case SPGIST_COMPRESS_PROC:
+				if (configOutLefttype != procform->amproclefttype ||
+					configOutRighttype != procform->amprocrighttype)
+					ok = false;
+				else
+					ok = check_amproc_signature(procform->amproc,
+												configOut.leafType, true,
+												1, 1, procform->amproclefttype);
+				break;
 			default:
 				ereport(INFO,
 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -212,7 +234,7 @@ spgvalidate(Oid opclassoid)
 		if (thisgroup->lefttype != thisgroup->righttype)
 			continue;
 
-		for (i = 1; i <= SPGISTNProc; i++)
+		for (i = 1; i <= SPGISTNRequiredProc; i++)
 		{
 			if ((thisgroup->functionset & (((uint64) 1) << i)) != 0)
 				continue;		/* got it */
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index d1bc396..a477278 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -30,7 +30,9 @@
 #define SPGIST_PICKSPLIT_PROC			3
 #define SPGIST_INNER_CONSISTENT_PROC	4
 #define SPGIST_LEAF_CONSISTENT_PROC		5
-#define SPGISTNProc						5
+#define SPGIST_COMPRESS_PROC			6
+#define SPGISTNRequiredProc				5
+#define SPGISTNProc						6
 
 /*
  * Argument structs for spg_config method
@@ -44,6 +46,7 @@ typedef struct spgConfigOut
 {
 	Oid			prefixType;		/* Data type of inner-tuple prefixes */
 	Oid			labelType;		/* Data type of inner-tuple node labels */
+	Oid			leafType;		/* Data type of leaf (type of SPGIST_COMPRESS_PROC output) */
 	bool		canReturnData;	/* Opclass can reconstruct original data */
 	bool		longValuesOK;	/* Opclass can cope with values > 1 page */
 } spgConfigOut;
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 1c4b321..69dc2ba 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -119,7 +119,8 @@ typedef struct SpGistState
 {
 	spgConfigOut config;		/* filled in by opclass config method */
 
-	SpGistTypeDesc attType;		/* type of input data and leaf values */
+	SpGistTypeDesc attType;		/* type of values to be indexed/restored */
+	SpGistTypeDesc attLeafType;		/* type of leaf values */
 	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
 	SpGistTypeDesc attLabelType;	/* type of node label values */
 
@@ -178,7 +179,8 @@ typedef struct SpGistCache
 {
 	spgConfigOut config;		/* filled in by opclass config method */
 
-	SpGistTypeDesc attType;		/* type of input data and leaf values */
+	SpGistTypeDesc attType;		/* type of values to be indexed/restored */
+	SpGistTypeDesc attLeafType;		/* type of leaf values */
 	SpGistTypeDesc attPrefixType;	/* type of inner-tuple prefix values */
 	SpGistTypeDesc attLabelType;	/* type of node label values */
 
@@ -300,7 +302,7 @@ typedef SpGistLeafTupleData *SpGistLeafTuple;
 
 #define SGLTHDRSZ			MAXALIGN(sizeof(SpGistLeafTupleData))
 #define SGLTDATAPTR(x)		(((char *) (x)) + SGLTHDRSZ)
-#define SGLTDATUM(x, s)		((s)->attType.attbyval ? \
+#define SGLTDATUM(x, s)		((s)->attLeafType.attbyval ? \
 							 *(Datum *) SGLTDATAPTR(x) : \
 							 PointerGetDatum(SGLTDATAPTR(x)))
 
-- 
2.7.4

