From 16f010e5853ee0cc9baf4e44753eda04c6fabe32 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 2 Jul 2026 16:51:35 -0400
Subject: [PATCH v3 1/5] Reverse-engineer some documentation for btree_gist's
 varlena modules.

There were a number of rather subtle points about the behavior of
this code, which its original authors did not deign to document.
Try to improve that.  In particular, explain how internal and leaf
keys can differ and what the restrictions are on that.

This work arose from trying to fix some bugs, and in the process
I believe I've identified some more, but this patch does not attempt
to fix anything, only document it.  I did make a few purely cosmetic
code changes, such as removing dead (and confusing!) initializations
of variables and choosing more appropriate types for some pointers.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/AH*AvQCYKhQGVvPWi1GiU4oY.8.1781609375063.Hmail.3020001251@tju.edu.cn
---
 contrib/btree_gist/btree_bit.c       | 51 ++++++++++++++++++++------
 contrib/btree_gist/btree_bytea.c     |  4 +-
 contrib/btree_gist/btree_numeric.c   |  9 ++++-
 contrib/btree_gist/btree_text.c      | 12 +++++-
 contrib/btree_gist/btree_utils_var.c | 55 ++++++++++++++++++++--------
 contrib/btree_gist/btree_utils_var.h | 38 ++++++++++++++++++-
 6 files changed, 136 insertions(+), 33 deletions(-)

diff --git a/contrib/btree_gist/btree_bit.c b/contrib/btree_gist/btree_bit.c
index 2b9c18a586f..dcfe6ed17e4 100644
--- a/contrib/btree_gist/btree_bit.c
+++ b/contrib/btree_gist/btree_bit.c
@@ -1,5 +1,11 @@
 /*
  * contrib/btree_gist/btree_bit.c
+ *
+ * Support for bit and varbit types (which act the same for our purposes).
+ *
+ * Leaf-page keys are bit/varbit values, but internal-page keys are just
+ * bytea values containing the first N bytes of the represented bitstring.
+ * (In particular, they lack the bit_len field of a bit/varbit Datum.)
  */
 #include "postgres.h"
 
@@ -63,6 +69,10 @@ gbt_bitlt(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 											PointerGetDatum(b)));
 }
 
+/*
+ * Notice we use byteacmp here, not bitcmp as you might expect.
+ * That's because internal-page keys are bytea.
+ */
 static int32
 gbt_bitcmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 {
@@ -72,10 +82,25 @@ gbt_bitcmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 }
 
 
+/*
+ * Convert a leaf-page bit/varbit value to internal-page form.
+ *
+ * The important change here is to remove the bit_len field so that
+ * what we have left looks like a bytea.
+ *
+ * The business with padding to INTALIGN length appears entirely historical,
+ * but we can't remove that without breaking on-disk compatibility.
+ * For example, if the lower bound of some leaf page is the 20-bit string
+ * of all ones, with data contents FFFFF0, this code pads it to bytea FFFFF000
+ * in the internal key representing that page.  If, when we search the index
+ * for that value, we did not again pad to FFFFF000, then byteacmp would
+ * say that the query is strictly less than the lower bound so we would not
+ * descend to that leaf page.
+ */
 static bytea *
-gbt_bit_xfrm(bytea *leaf)
+gbt_bit_xfrm(VarBit *leaf)
 {
-	bytea	   *out = leaf;
+	bytea	   *out;
 	int			sz = VARBITBYTES(leaf) + VARHDRSZ;
 	int			padded_sz = INTALIGN(sz);
 
@@ -88,17 +113,19 @@ gbt_bit_xfrm(bytea *leaf)
 	return out;
 }
 
-
-
-
+/*
+ * Convert a GBT_VARKEY representation of a leaf key to a palloc'd
+ * GBT_VARKEY representation of an internal key.
+ * We assume lower == upper since it's a leaf key.
+ */
 static GBT_VARKEY *
 gbt_bit_l2n(GBT_VARKEY *leaf, FmgrInfo *flinfo)
 {
-	GBT_VARKEY *out = leaf;
+	GBT_VARKEY *out;
 	GBT_VARKEY_R r = gbt_var_key_readable(leaf);
 	bytea	   *o;
 
-	o = gbt_bit_xfrm(r.lower);
+	o = gbt_bit_xfrm((VarBit *) r.lower);
 	r.upper = r.lower = o;
 	out = gbt_var_key_copy(&r);
 	pfree(o);
@@ -110,14 +137,14 @@ static const gbtree_vinfo tinfo =
 {
 	gbt_t_bit,
 	0,
-	true,
+	true,						/* internal keys can be truncated */
 	gbt_bitgt,
 	gbt_bitge,
 	gbt_biteq,
 	gbt_bitle,
 	gbt_bitlt,
 	gbt_bitcmp,
-	gbt_bit_l2n
+	gbt_bit_l2n					/* leaf to internal transformation */
 };
 
 
@@ -137,7 +164,7 @@ Datum
 gbt_bit_consistent(PG_FUNCTION_ARGS)
 {
 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
-	void	   *query = DatumGetByteaP(PG_GETARG_DATUM(1));
+	VarBit	   *query = PG_GETARG_VARBIT_P(1);
 	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
 #ifdef NOT_USED
 	Oid			subtype = PG_GETARG_OID(3);
@@ -155,7 +182,8 @@ gbt_bit_consistent(PG_FUNCTION_ARGS)
 									true, &tinfo, fcinfo->flinfo);
 	else
 	{
-		bytea	   *q = gbt_bit_xfrm((bytea *) query);
+		/* Must convert to internal form to compare to internal-page entries */
+		bytea	   *q = gbt_bit_xfrm(query);
 
 		retval = gbt_var_consistent(&r, q, strategy, PG_GET_COLLATION(),
 									false, &tinfo, fcinfo->flinfo);
@@ -217,6 +245,7 @@ gbt_bit_ssup_cmp(Datum x, Datum y, SortSupport ssup)
 	Datum		result;
 
 	/* for leaf items we expect lower == upper, so only compare lower */
+	/* XXX shouldn't this use bitcmp() ? */
 	result = DirectFunctionCall2(byteacmp,
 								 PointerGetDatum(arg1.lower),
 								 PointerGetDatum(arg2.lower));
diff --git a/contrib/btree_gist/btree_bytea.c b/contrib/btree_gist/btree_bytea.c
index 50bb24308e9..8411598a99b 100644
--- a/contrib/btree_gist/btree_bytea.c
+++ b/contrib/btree_gist/btree_bytea.c
@@ -1,5 +1,7 @@
 /*
  * contrib/btree_gist/btree_bytea.c
+ *
+ * Support for bytea data type.
  */
 #include "postgres.h"
 
@@ -72,7 +74,7 @@ static const gbtree_vinfo tinfo =
 {
 	gbt_t_bytea,
 	0,
-	true,
+	true,						/* internal keys can be truncated */
 	gbt_byteagt,
 	gbt_byteage,
 	gbt_byteaeq,
diff --git a/contrib/btree_gist/btree_numeric.c b/contrib/btree_gist/btree_numeric.c
index dba04c3a1b3..8a506865b17 100644
--- a/contrib/btree_gist/btree_numeric.c
+++ b/contrib/btree_gist/btree_numeric.c
@@ -1,5 +1,7 @@
 /*
  * contrib/btree_gist/btree_numeric.c
+ *
+ * Support for numeric data type.
  */
 #include "postgres.h"
 
@@ -73,11 +75,16 @@ gbt_numeric_cmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 }
 
 
+/*
+ * We could conceivably support internal-key truncation here, but it would
+ * require custom truncation code, and most values wouldn't be long enough
+ * to make it worthwhile.
+ */
 static const gbtree_vinfo tinfo =
 {
 	gbt_t_numeric,
 	0,
-	false,
+	false,						/* no truncation permitted */
 	gbt_numeric_gt,
 	gbt_numeric_ge,
 	gbt_numeric_eq,
diff --git a/contrib/btree_gist/btree_text.c b/contrib/btree_gist/btree_text.c
index 2ac12f1cab2..e7269ba3e7c 100644
--- a/contrib/btree_gist/btree_text.c
+++ b/contrib/btree_gist/btree_text.c
@@ -1,5 +1,7 @@
 /*
  * contrib/btree_gist/btree_text.c
+ *
+ * Support for text and bpchar types.
  */
 #include "postgres.h"
 
@@ -78,11 +80,17 @@ gbt_textcmp(const void *a, const void *b, Oid collation, FmgrInfo *flinfo)
 												 PointerGetDatum(b)));
 }
 
+/*
+ * Originally this module permitted truncation of internal keys, but that
+ * tends to result in wrong comparison answers if the collation is any
+ * more complicated than C.  The prefix-match hack used for simpler types
+ * can't fix it, either.
+ */
 static gbtree_vinfo tinfo =
 {
 	gbt_t_text,
 	0,
-	false,
+	false,						/* no truncation permitted */
 	gbt_textgt,
 	gbt_textge,
 	gbt_texteq,
@@ -152,7 +160,7 @@ static gbtree_vinfo bptinfo =
 {
 	gbt_t_bpchar,
 	0,
-	false,
+	false,						/* as above, no truncation permitted */
 	gbt_bpchargt,
 	gbt_bpcharge,
 	gbt_bpchareq,
diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c
index 25c3bbe8eac..bceadf5ae60 100644
--- a/contrib/btree_gist/btree_utils_var.c
+++ b/contrib/btree_gist/btree_utils_var.c
@@ -1,5 +1,7 @@
 /*
  * contrib/btree_gist/btree_utils_var.c
+ *
+ * Common routines for btree_gist code working with varlena indexed data types.
  */
 #include "postgres.h"
 
@@ -37,6 +39,7 @@ gbt_var_decompress(PG_FUNCTION_ARGS)
 	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
 	GBT_VARKEY *key = (GBT_VARKEY *) PG_DETOAST_DATUM(entry->key);
 
+	/* We only need a new GISTENTRY if detoasting did something */
 	if (key != (GBT_VARKEY *) DatumGetPointer(entry->key))
 	{
 		GISTENTRY  *retval = palloc_object(GISTENTRY);
@@ -51,7 +54,10 @@ gbt_var_decompress(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(entry);
 }
 
-/* Returns a better readable representation of variable key ( sets pointer ) */
+/*
+ * Extract a "readable" representation of a GBT_VARKEY, containing direct
+ * pointers to the contained lower and upper datums.
+ */
 GBT_VARKEY_R
 gbt_var_key_readable(const GBT_VARKEY *k)
 {
@@ -83,7 +89,10 @@ gbt_var_key_from_datum(const varlena *u)
 }
 
 /*
- * Create an entry to store in the index, from lower and upper bound.
+ * Create a key entry to store in the index, from lower and upper bound.
+ *
+ * This code assumes that none of the types we work with require more
+ * than INTALIGN alignment.
  */
 GBT_VARKEY *
 gbt_var_key_copy(const GBT_VARKEY_R *u)
@@ -100,16 +109,17 @@ gbt_var_key_copy(const GBT_VARKEY_R *u)
 	return r;
 }
 
-
+/*
+ * Convert a GBT_VARKEY in leaf form to a GBT_VARKEY in internal form.
+ * No-op if the data type doesn't require a transformation.
+ */
 static GBT_VARKEY *
 gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
 {
-	GBT_VARKEY *out = leaf;
-
 	if (tinfo->f_l2n)
-		out = tinfo->f_l2n(leaf, flinfo);
-
-	return out;
+		return tinfo->f_l2n(leaf, flinfo);
+	else
+		return leaf;
 }
 
 
@@ -173,7 +183,10 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
 
 
 /*
- * returns true, if query matches prefix ( common prefix )
+ * returns true if query matches prefix up to the length of the prefix
+ *
+ * We need this to avoid edge-case problems when the "prefix" is a truncated
+ * datum; see discussion in btree_utils_var.h.
  */
 static bool
 gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinfo)
@@ -195,7 +208,14 @@ gbt_bytea_pf_match(const bytea *pf, const bytea *query, const gbtree_vinfo *tinf
 
 
 /*
- * returns true, if query matches node using common prefix
+ * returns true if query matches node according to common-prefix rule
+ *
+ * If the data type is truncatable, then a shortened upper bound must be
+ * considered to include all values that match it up to its own length,
+ * even though longer values would normally be considered larger.
+ *
+ * XXX isn't the check against node->lower useless?
+ * A truncated lower bound would already be less than all included values.
  */
 static bool
 gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree_vinfo *tinfo)
@@ -207,13 +227,16 @@ gbt_var_node_pf_match(const GBT_VARKEY_R *node, const bytea *query, const gbtree
 
 
 /*
- *  truncates / compresses the node key
- *  cpf_length .. common prefix length
+ * truncates / compresses the node key
+ *
+ * cpf_length is the common prefix length of the lower and upper values.
+ * We truncate to that plus one byte, so that the node represents a range
+ * of leaf values but doesn't have undue specificity.
  */
 static GBT_VARKEY *
 gbt_var_node_truncate(const GBT_VARKEY *node, int32 cpf_length, const gbtree_vinfo *tinfo)
 {
-	GBT_VARKEY *out = NULL;
+	GBT_VARKEY *out;
 	GBT_VARKEY_R r = gbt_var_key_readable(node);
 	int32		len1 = VARSIZE(r.lower) - VARHDRSZ;
 	int32		len2 = VARSIZE(r.upper) - VARHDRSZ;
@@ -246,7 +269,7 @@ gbt_var_bin_union(Datum *u, GBT_VARKEY *e, Oid collation,
 	GBT_VARKEY_R eo = gbt_var_key_readable(e);
 	GBT_VARKEY_R nr;
 
-	if (eo.lower == eo.upper)	/* leaf */
+	if (eo.lower == eo.upper)	/* if leaf, convert to internal form */
 	{
 		GBT_VARKEY *tmp;
 
@@ -355,7 +378,7 @@ gbt_var_union(const GistEntryVector *entryvec, int32 *size, Oid collation,
 	if (tinfo->trnc)
 	{
 		int32		plen;
-		GBT_VARKEY *trc = NULL;
+		GBT_VARKEY *trc;
 
 		plen = gbt_var_node_cp_len((GBT_VARKEY *) DatumGetPointer(out), tinfo);
 		trc = gbt_var_node_truncate((GBT_VARKEY *) DatumGetPointer(out), plen + 1, tinfo);
@@ -396,7 +419,7 @@ gbt_var_penalty(float *res, const GISTENTRY *o, const GISTENTRY *n,
 	*res = 0.0;
 
 	nk = gbt_var_key_readable(newe);
-	if (nk.lower == nk.upper)	/* leaf */
+	if (nk.lower == nk.upper)	/* if leaf, convert to internal form */
 	{
 		GBT_VARKEY *tmp;
 
diff --git a/contrib/btree_gist/btree_utils_var.h b/contrib/btree_gist/btree_utils_var.h
index f28a5d98ae3..c0ac77cb9ec 100644
--- a/contrib/btree_gist/btree_utils_var.h
+++ b/contrib/btree_gist/btree_utils_var.h
@@ -1,5 +1,7 @@
 /*
  * contrib/btree_gist/btree_utils_var.h
+ *
+ * Declarations for btree_gist code working with varlena indexed data types.
  */
 #ifndef __BTREE_UTILS_VAR_H__
 #define __BTREE_UTILS_VAR_H__
@@ -7,10 +9,21 @@
 #include "access/gist.h"
 #include "btree_gist.h"
 
-/* Variable length key */
+/*
+ * An internal index key (also called a node in some places) includes a
+ * lower and an upper bound, both of which are varlena datums, packed into
+ * a wrapper varlena datum.  We also work with leaf keys, which contain a
+ * single varlena datum wrapped in another varlena; this case can be
+ * distinguished by noting that the wrapper isn't long enough to hold a
+ * second datum.  For simplicity we consider the wrapper to be a bytea.
+ */
 typedef bytea GBT_VARKEY;
 
-/* Better readable key */
+/*
+ * To actually work on a key, we use this more readable struct containing
+ * pointers directly to the lower and upper values.  gbt_var_key_readable()
+ * and gbt_var_key_copy() convert between these two representations.
+ */
 typedef struct
 {
 	bytea	   *lower,
@@ -19,6 +32,27 @@ typedef struct
 
 /*
  * type description
+ *
+ * For types that set the "trnc" flag, the representation of keys on internal
+ * pages may be different from that of keys on leaf pages (which match the
+ * data type's normal representation).  The methods f_gt, f_ge, f_eq, f_le,
+ * f_lt expect to work on the normal representation, and therefore can be used
+ * only with leaf-page index entries.  The f_cmp method expects to work on the
+ * representation used on internal pages, so it must not be used with leaf
+ * entries.  The f_l2n method converts the leaf-page representation to the
+ * internal-page representation; if that pointer is NULL, they're the same.
+ *
+ * Currently, btree_utils_var.c effectively assumes that internal-page keys
+ * of "trnc" types are equivalent to bytea in representation and semantics.
+ * The truncation process shortens the lower+upper bounds of a downlink node
+ * to be of length equal to their common prefix's length plus one byte.
+ * This would not work for types with comparison semantics more complex than
+ * bytewise comparison.  Even then, we need a hack to deal with the fact that
+ * shortening the upper bound would normally lead to its being considered less
+ * than the original maximum leaf-page entry.  We handle that by considering
+ * any search key that matches the bound for the bound's full length to be a
+ * potential match, even if it's longer (see gbt_var_node_pf_match and its
+ * callers).
  */
 typedef struct
 {
-- 
2.52.0

