From ce018b1b4ceaafc2c5e2fc74c4378cdd09a93fb2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 1 May 2026 16:31:45 +0900 Subject: [PATCH] Refactor some code logic around GUC default_toast_compression The TOAST compression code links the GUC values of default_toast_compression to the on-disk varatt attributes, limiting the number of elements in the enum GUC structure to 4 elements to cover for the 2 bits that can be saved in va_tcinfo or va_extinfo, depending on if we are dealing with an inline compressible entry, or an external TOAST pointer. This commit refactors the code so as we have a clean split between the various concepts related to TOAST compression, for: - The on-disk varatt values. - The GUC enum values. These are not anymore limited to 4 values. - The catalog attribute char values. - The compression method names. The knowledge of each compression method is now localized in a single "registry", in toast_compression.c, with a set of routines that are able to retrieve some of the properties. The goal of this patch is to ease the addition of future methods. One idea would be split the on-disk varatt properties across multiple vartags. Lifting the GUC enum limitation offers this benefit, perhaps it could be useful for other purposes. --- src/include/access/toast_compression.h | 39 +++-- src/include/access/toast_internals.h | 4 +- src/include/varatt.h | 13 +- src/backend/access/common/detoast.c | 14 +- src/backend/access/common/toast_compression.c | 137 +++++++++++++++--- src/backend/access/common/toast_internals.c | 13 +- src/backend/utils/adt/varlena.c | 8 +- src/backend/utils/misc/guc_tables.c | 4 +- doc/src/sgml/ref/alter_index.sgml | 5 + contrib/amcheck/verify_heapam.c | 19 +-- src/tools/pgindent/typedefs.list | 3 +- 11 files changed, 179 insertions(+), 80 deletions(-) diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 3265f10b734f..3d46be67f756 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -16,30 +16,21 @@ /* * GUC support. * - * default_toast_compression is an integer for purposes of the GUC machinery, + * default_toast_compression is an integer for purposes of the GUC machinery. * but the value is one of the char values defined below, as they appear in - * pg_attribute.attcompression, e.g. TOAST_PGLZ_COMPRESSION. + * pg_attribute.attcompression, e.g. TOAST_PGLZ_COMPRESSION. The on-disk + * compression ID values (TOAST_COMPRESS_*) are defined in varatt.h. */ extern PGDLLIMPORT int default_toast_compression; /* - * Built-in compression method ID. The toast compression header will store - * this in the first 2 bits of the raw length. These built-in compression - * method IDs are directly mapped to the built-in compression methods. - * - * Don't use these values for anything other than understanding the meaning - * of the raw bits from a varlena; in particular, if the goal is to identify - * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc. - * below. We might someday support more than 4 compression methods, but - * we can never have more than 4 values in this enum, because there are - * only 2 bits available in the places where this is stored. + * Values for GUC default_toast_compression. */ -typedef enum ToastCompressionId +typedef enum ToastCompressionGucValue { - TOAST_PGLZ_COMPRESSION_ID = 0, - TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, -} ToastCompressionId; + TOAST_PGLZ_COMPRESSION_GUC = 0, + TOAST_LZ4_COMPRESSION_GUC = 1, +} ToastCompressionGucValue; /* * Built-in compression methods. pg_attribute will store these in the @@ -57,9 +48,9 @@ typedef enum ToastCompressionId * compiled-in, use it, otherwise use pglz. */ #ifdef USE_LZ4 -#define DEFAULT_TOAST_COMPRESSION TOAST_LZ4_COMPRESSION +#define DEFAULT_TOAST_COMPRESSION TOAST_LZ4_COMPRESSION_GUC #else -#define DEFAULT_TOAST_COMPRESSION TOAST_PGLZ_COMPRESSION +#define DEFAULT_TOAST_COMPRESSION TOAST_PGLZ_COMPRESSION_GUC #endif /* pglz compression/decompression routines */ @@ -75,8 +66,16 @@ extern varlena *lz4_decompress_datum_slice(const varlena *value, int32 slicelength); /* other stuff */ -extern ToastCompressionId toast_get_compression_id(varlena *attr); +extern uint32 toast_get_compression_id(varlena *attr); extern char CompressionNameToMethod(const char *compression); extern const char *GetCompressionMethodName(char method); +/* + * Registry translation functions. "cmid" is the on-disk varatt value. + */ +extern char ToastCompressionGucToMethod(ToastCompressionGucValue guc_value); +extern uint32 MethodToCompressionId(char method); +extern char CompressionIdToMethod(uint32 cmid); +extern bool CompressionIdIsValid(uint32 cmid); + #endif /* TOAST_COMPRESSION_H */ diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index bf45889a6428..dcf6131c7b24 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -39,8 +39,8 @@ typedef struct toast_compress_header #define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ do { \ Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ - Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ + Assert((cm_method) == TOAST_COMPRESS_PGLZ || \ + (cm_method) == TOAST_COMPRESS_LZ4); \ ((toast_compress_header *) (ptr))->tcinfo = \ (len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \ } while (0) diff --git a/src/include/varatt.h b/src/include/varatt.h index 000bdf33b923..fb4199e6cac1 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -45,6 +45,15 @@ typedef struct varatt_external #define VARLENA_EXTSIZE_BITS 30 #define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1) +/* + * On-disk compression method IDs stored in the high bits of va_tcinfo + * and va_extinfo. Only 2 bits are available, so at most 4 values should + * be used here. + */ +#define TOAST_COMPRESS_PGLZ 0 +#define TOAST_COMPRESS_LZ4 1 +#define TOAST_COMPRESS_INVALID 2 + /* * varatt_indirect is a "TOAST pointer" representing an out-of-line * Datum that's stored in memory, not in an external toast relation. @@ -519,8 +528,8 @@ VARATT_EXTERNAL_GET_COMPRESS_METHOD(varatt_external toast_pointer) /* This has to remain a macro; beware multiple evaluations! */ #define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ do { \ - Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm) == TOAST_LZ4_COMPRESSION_ID); \ + Assert((cm) == TOAST_COMPRESS_PGLZ || \ + (cm) == TOAST_COMPRESS_LZ4); \ ((toast_pointer).va_extinfo = \ (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ } while (0) diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index a6c1f3a734b2..20111d6b275d 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -252,7 +252,7 @@ detoast_attr_slice(varlena *attr, * able to decompress the required slice. */ if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == - TOAST_PGLZ_COMPRESSION_ID) + TOAST_COMPRESS_PGLZ) max_size = pglz_maximum_compressed_size(slicelimit, max_size); /* @@ -470,7 +470,7 @@ toast_fetch_datum_slice(varlena *attr, int32 sliceoffset, static varlena * toast_decompress_datum(varlena *attr) { - ToastCompressionId cmid; + uint32 cmid; Assert(VARATT_IS_COMPRESSED(attr)); @@ -481,9 +481,9 @@ toast_decompress_datum(varlena *attr) cmid = TOAST_COMPRESS_METHOD(attr); switch (cmid) { - case TOAST_PGLZ_COMPRESSION_ID: + case TOAST_COMPRESS_PGLZ: return pglz_decompress_datum(attr); - case TOAST_LZ4_COMPRESSION_ID: + case TOAST_COMPRESS_LZ4: return lz4_decompress_datum(attr); default: elog(ERROR, "invalid compression method id %d", cmid); @@ -502,7 +502,7 @@ toast_decompress_datum(varlena *attr) static varlena * toast_decompress_datum_slice(varlena *attr, int32 slicelength) { - ToastCompressionId cmid; + uint32 cmid; Assert(VARATT_IS_COMPRESSED(attr)); @@ -524,9 +524,9 @@ toast_decompress_datum_slice(varlena *attr, int32 slicelength) cmid = TOAST_COMPRESS_METHOD(attr); switch (cmid) { - case TOAST_PGLZ_COMPRESSION_ID: + case TOAST_COMPRESS_PGLZ: return pglz_decompress_datum_slice(attr, slicelength); - case TOAST_LZ4_COMPRESSION_ID: + case TOAST_COMPRESS_LZ4: return lz4_decompress_datum_slice(attr, slicelength); default: elog(ERROR, "invalid compression method id %d", cmid); diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 5a5d579494a2..9881d9242624 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -31,6 +31,28 @@ int default_toast_compression = DEFAULT_TOAST_COMPRESSION; errmsg("compression method %s not supported", method), \ errdetail("This functionality requires the server to be built with %s support.", method))) +/* + * Compression Method Registry for TOAST. + * + * This holds the metadata associated to each compression method supported + * by TOAST: name, values in attcompression, on-disk compression ID values + * in varlenas, and GUC enum values. + */ +typedef struct ToastCompressionRegistryEntry +{ + const char *name; /* method name */ + char method; /* attcompression */ + uint32 cmid; /* varlena on-disk ID */ + ToastCompressionGucValue guc_value; /* GUC enum */ +} ToastCompressionRegistryEntry; + +static const ToastCompressionRegistryEntry toast_compression_registry[] = { + {"pglz", TOAST_PGLZ_COMPRESSION, TOAST_COMPRESS_PGLZ, TOAST_PGLZ_COMPRESSION_GUC}, + {"lz4", TOAST_LZ4_COMPRESSION, TOAST_COMPRESS_LZ4, TOAST_LZ4_COMPRESSION_GUC}, +}; + +#define TOAST_NUM_COMPRESSIONS lengthof(toast_compression_registry) + /* * Compress a varlena using PGLZ. * @@ -248,12 +270,12 @@ lz4_decompress_datum_slice(const varlena *value, int32 slicelength) /* * Extract compression ID from a varlena. * - * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. + * Returns TOAST_COMPRESS_INVALID if the varlena is not compressed. */ -ToastCompressionId +uint32 toast_get_compression_id(varlena *attr) { - ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + uint32 cmid = TOAST_COMPRESS_INVALID; /* * If it is stored externally then fetch the compression method id from @@ -278,39 +300,116 @@ toast_get_compression_id(varlena *attr) /* * CompressionNameToMethod - Get compression method from compression name * - * Search in the available built-in methods. If the compression not found - * in the built-in methods then return InvalidCompressionMethod. + * Search the compression registry by name. If the method name is not found + * then return InvalidCompressionMethod. */ char CompressionNameToMethod(const char *compression) { - if (strcmp(compression, "pglz") == 0) - return TOAST_PGLZ_COMPRESSION; - else if (strcmp(compression, "lz4") == 0) + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) { + if (strcmp(compression, toast_compression_registry[i].name) == 0) + { #ifndef USE_LZ4 - NO_COMPRESSION_SUPPORT("lz4"); + if (strcmp(compression, "lz4") == 0) + NO_COMPRESSION_SUPPORT("lz4"); #endif - return TOAST_LZ4_COMPRESSION; + return toast_compression_registry[i].method; + } } return InvalidCompressionMethod; } /* - * GetCompressionMethodName - Get compression method name + * GetCompressionMethodName + * + * Get compression method name, based on a compression method char, or + * attcompression. */ const char * GetCompressionMethodName(char method) { - switch (method) + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) { - case TOAST_PGLZ_COMPRESSION: - return "pglz"; - case TOAST_LZ4_COMPRESSION: - return "lz4"; - default: - elog(ERROR, "invalid compression method %c", method); - return NULL; /* keep compiler quiet */ + if (toast_compression_registry[i].method == method) + return toast_compression_registry[i].name; } + + elog(ERROR, "invalid compression method %c", method); + return NULL; /* keep compiler quiet */ +} + +/* + * ToastCompressionGucToMethod + * + * Translate a GUC value to a compression method char, for attcompression. + */ +char +ToastCompressionGucToMethod(ToastCompressionGucValue guc_value) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].guc_value == guc_value) + return toast_compression_registry[i].method; + } + + elog(ERROR, "invalid compression GUC value %d", guc_value); + return InvalidCompressionMethod; /* keep compiler quiet */ +} + +/* + * MethodToCompressionId + * + * Translate a catalog compression method char (attcompression) to the + * corresponding on-disk varatt ID. + */ +uint32 +MethodToCompressionId(char method) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].method == method) + return toast_compression_registry[i].cmid; + } + + elog(ERROR, "invalid compression method %c", method); + return TOAST_COMPRESS_INVALID; /* keep compiler quiet */ +} + +/* + * CompressionIdToMethod + * + * Translate an on-disk varatt ID to the corresponding catalog compression + * method char (attcompression). + */ +char +CompressionIdToMethod(uint32 cmid) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].cmid == cmid) + return toast_compression_registry[i].method; + } + + elog(ERROR, "invalid compression method id %d", cmid); + return InvalidCompressionMethod; /* keep compiler quiet */ +} + +/* + * CompressionIdIsValid + * + * Check whether a compression ID is registered. Returns true if the + * ID corresponds to a known compression method, false otherwise. + */ +bool +CompressionIdIsValid(uint32 cmid) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].cmid == cmid) + return true; + } + + return false; } diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 77d42e7ed65a..abae7c497b9a 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -47,7 +47,7 @@ toast_compress_datum(Datum value, char cmethod) { varlena *tmp = NULL; int32 valsize; - ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + uint32 cmid = TOAST_COMPRESS_INVALID; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); @@ -56,20 +56,21 @@ toast_compress_datum(Datum value, char cmethod) /* If the compression method is not valid, use the current default */ if (!CompressionMethodIsValid(cmethod)) - cmethod = default_toast_compression; + cmethod = ToastCompressionGucToMethod(default_toast_compression); /* - * Call appropriate compression routine for the compression method. + * Translate the compression method char to the on-disk compression ID + * via the Method Registry, then dispatch to the appropriate compression + * routine. */ + cmid = MethodToCompressionId(cmethod); switch (cmethod) { case TOAST_PGLZ_COMPRESSION: tmp = pglz_compress_datum((const varlena *) DatumGetPointer(value)); - cmid = TOAST_PGLZ_COMPRESSION_ID; break; case TOAST_LZ4_COMPRESSION: tmp = lz4_compress_datum((const varlena *) DatumGetPointer(value)); - cmid = TOAST_LZ4_COMPRESSION_ID; break; default: elog(ERROR, "invalid compression method %c", cmethod); @@ -91,7 +92,7 @@ toast_compress_datum(Datum value, char cmethod) if (VARSIZE(tmp) < valsize - 2) { /* successful compression */ - Assert(cmid != TOAST_INVALID_COMPRESSION_ID); + Assert(cmid != TOAST_COMPRESS_INVALID); TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid); return PointerGetDatum(tmp); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index c0ff51bd2fc1..13c6ba25234e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -4205,7 +4205,7 @@ pg_column_compression(PG_FUNCTION_ARGS) { int typlen; char *result; - ToastCompressionId cmid; + uint32 cmid; /* On first call, get the input type's typlen, and save at *fn_extra */ if (fcinfo->flinfo->fn_extra == NULL) @@ -4230,16 +4230,16 @@ pg_column_compression(PG_FUNCTION_ARGS) /* get the compression method id stored in the compressed varlena */ cmid = toast_get_compression_id((varlena *) DatumGetPointer(PG_GETARG_DATUM(0))); - if (cmid == TOAST_INVALID_COMPRESSION_ID) + if (cmid == TOAST_COMPRESS_INVALID) PG_RETURN_NULL(); /* convert compression method id to compression method name */ switch (cmid) { - case TOAST_PGLZ_COMPRESSION_ID: + case TOAST_COMPRESS_PGLZ: result = "pglz"; break; - case TOAST_LZ4_COMPRESSION_ID: + case TOAST_COMPRESS_LZ4: result = "lz4"; break; default: diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 290ccbc543e2..0fadc907e79d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -469,9 +469,9 @@ static const struct config_enum_entry shared_memory_options[] = { }; static const struct config_enum_entry default_toast_compression_options[] = { - {"pglz", TOAST_PGLZ_COMPRESSION, false}, + {"pglz", TOAST_PGLZ_COMPRESSION_GUC, false}, #ifdef USE_LZ4 - {"lz4", TOAST_LZ4_COMPRESSION, false}, + {"lz4", TOAST_LZ4_COMPRESSION_GUC, false}, #endif {NULL, 0, false} }; diff --git a/doc/src/sgml/ref/alter_index.sgml b/doc/src/sgml/ref/alter_index.sgml index 1d42d05d8581..fb7096c16ea6 100644 --- a/doc/src/sgml/ref/alter_index.sgml +++ b/doc/src/sgml/ref/alter_index.sgml @@ -97,6 +97,11 @@ ALTER INDEX ALL IN TABLESPACE name index cannot be dropped by itself, and will automatically be dropped if its parent index is dropped. + + If the named index is already attached to the altered index, the + command will attempt to validate the parent index if the parent is + currently invalid. + diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 20ff58aa7825..2d665a572754 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1784,26 +1784,11 @@ check_tuple_attribute(HeapCheckContext *ctx) if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { - ToastCompressionId cmid; - bool valid = false; + uint32 cmid; /* Compressed attributes should have a valid compression method */ cmid = TOAST_COMPRESS_METHOD(&toast_pointer); - switch (cmid) - { - /* List of all valid compression method IDs */ - case TOAST_PGLZ_COMPRESSION_ID: - case TOAST_LZ4_COMPRESSION_ID: - valid = true; - break; - - /* Recognized but invalid compression method ID */ - case TOAST_INVALID_COMPRESSION_ID: - break; - - /* Intentionally no default here */ - } - if (!valid) + if (!CompressionIdIsValid(cmid)) report_corruption(ctx, psprintf("toast value %u has invalid compression method id %d", toast_pointer.va_valueid, cmid)); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 0abdb2d37e2d..f83aaac00f17 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3198,7 +3198,8 @@ TimingClockSourceType TmFromChar TmToChar ToastAttrInfo -ToastCompressionId +ToastCompressionGucValue +ToastCompressionRegistryEntry ToastTupleContext ToastedAttribute TocEntry -- 2.54.0