From 1488368c6fb2671e267eeaf9b004c74e1eb989cd Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 11 May 2026 16:54:04 +0900 Subject: [PATCH v3] Refactor some code logic around GUC default_toast_compression This commit refactors the code around TOAST compression methods so as we have a cleaner split between the various concepts related to TOAST compression, for: - The on-disk varatt values. - The catalog attribute char values (attcompression). - The compression method names. The knowledge of each compression method is now localized in a single "registry", in toast_compression.c, with a set of routines that are able to retrieve some of the properties. The goal of this patch is to ease the addition of future methods. One idea would be split the on-disk varatt properties across multiple vartags. --- src/include/access/toast_compression.h | 27 ++---- src/include/varatt.h | 9 ++ src/backend/access/common/detoast.c | 4 +- src/backend/access/common/toast_compression.c | 97 +++++++++++++++---- src/backend/access/common/toast_internals.c | 7 +- src/backend/utils/adt/varlena.c | 2 +- contrib/amcheck/verify_heapam.c | 19 +--- src/tools/pgindent/typedefs.list | 2 +- 8 files changed, 105 insertions(+), 62 deletions(-) diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 3265f10b734f..433a33467b0e 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -22,25 +22,6 @@ */ extern PGDLLIMPORT int default_toast_compression; -/* - * Built-in compression method ID. The toast compression header will store - * this in the first 2 bits of the raw length. These built-in compression - * method IDs are directly mapped to the built-in compression methods. - * - * Don't use these values for anything other than understanding the meaning - * of the raw bits from a varlena; in particular, if the goal is to identify - * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc. - * below. We might someday support more than 4 compression methods, but - * we can never have more than 4 values in this enum, because there are - * only 2 bits available in the places where this is stored. - */ -typedef enum ToastCompressionId -{ - TOAST_PGLZ_COMPRESSION_ID = 0, - TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, -} ToastCompressionId; - /* * Built-in compression methods. pg_attribute will store these in the * attcompression column. In attcompression, InvalidCompressionMethod @@ -75,8 +56,14 @@ extern varlena *lz4_decompress_datum_slice(const varlena *value, int32 slicelength); /* other stuff */ -extern ToastCompressionId toast_get_compression_id(varlena *attr); +extern uint32 toast_get_compression_id(varlena *attr); extern char CompressionNameToMethod(const char *compression); extern const char *GetCompressionMethodName(char method); +/* + * Registry translation functions. "cmid" is the on-disk varatt value. + */ +extern uint32 MethodToCompressionId(char method); +extern bool CompressionIdIsValid(uint32 cmid); + #endif /* TOAST_COMPRESSION_H */ diff --git a/src/include/varatt.h b/src/include/varatt.h index 000bdf33b923..4df676d90888 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -45,6 +45,15 @@ typedef struct varatt_external #define VARLENA_EXTSIZE_BITS 30 #define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1) +/* + * On-disk compression method IDs stored in the high bits of va_tcinfo + * and va_extinfo. Only 2 bits are available, so at most 4 values should + * be used here. + */ +#define TOAST_PGLZ_COMPRESSION_ID 0 +#define TOAST_LZ4_COMPRESSION_ID 1 +#define TOAST_INVALID_COMPRESSION_ID 2 + /* * varatt_indirect is a "TOAST pointer" representing an out-of-line * Datum that's stored in memory, not in an external toast relation. diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index a6c1f3a734b2..58be8081e915 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -470,7 +470,7 @@ toast_fetch_datum_slice(varlena *attr, int32 sliceoffset, static varlena * toast_decompress_datum(varlena *attr) { - ToastCompressionId cmid; + uint32 cmid; Assert(VARATT_IS_COMPRESSED(attr)); @@ -502,7 +502,7 @@ toast_decompress_datum(varlena *attr) static varlena * toast_decompress_datum_slice(varlena *attr, int32 slicelength) { - ToastCompressionId cmid; + uint32 cmid; Assert(VARATT_IS_COMPRESSED(attr)); diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 5a5d579494a2..4e20407e62a3 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -31,6 +31,27 @@ int default_toast_compression = DEFAULT_TOAST_COMPRESSION; errmsg("compression method %s not supported", method), \ errdetail("This functionality requires the server to be built with %s support.", method))) +/* + * Compression Method Registry for TOAST. + * + * This holds the metadata associated to each compression method supported + * by TOAST: name, values in attcompression, and on-disk compression ID + * values in varlenas. + */ +typedef struct ToastCompressionRegistryEntry +{ + const char *name; /* method name */ + char method; /* attcompression */ + uint32 cmid; /* varlena on-disk ID */ +} ToastCompressionRegistryEntry; + +static const ToastCompressionRegistryEntry toast_compression_registry[] = { + {"pglz", TOAST_PGLZ_COMPRESSION, TOAST_PGLZ_COMPRESSION_ID}, + {"lz4", TOAST_LZ4_COMPRESSION, TOAST_LZ4_COMPRESSION_ID}, +}; + +#define TOAST_NUM_COMPRESSIONS lengthof(toast_compression_registry) + /* * Compress a varlena using PGLZ. * @@ -250,10 +271,10 @@ lz4_decompress_datum_slice(const varlena *value, int32 slicelength) * * Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed. */ -ToastCompressionId +uint32 toast_get_compression_id(varlena *attr) { - ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + uint32 cmid = TOAST_INVALID_COMPRESSION_ID; /* * If it is stored externally then fetch the compression method id from @@ -278,39 +299,79 @@ toast_get_compression_id(varlena *attr) /* * CompressionNameToMethod - Get compression method from compression name * - * Search in the available built-in methods. If the compression not found - * in the built-in methods then return InvalidCompressionMethod. + * Search the compression registry by name. If the method name is not found + * then return InvalidCompressionMethod. */ char CompressionNameToMethod(const char *compression) { - if (strcmp(compression, "pglz") == 0) - return TOAST_PGLZ_COMPRESSION; - else if (strcmp(compression, "lz4") == 0) + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) { + if (strcmp(compression, toast_compression_registry[i].name) == 0) + { #ifndef USE_LZ4 - NO_COMPRESSION_SUPPORT("lz4"); + if (strcmp(compression, "lz4") == 0) + NO_COMPRESSION_SUPPORT("lz4"); #endif - return TOAST_LZ4_COMPRESSION; + return toast_compression_registry[i].method; + } } return InvalidCompressionMethod; } /* - * GetCompressionMethodName - Get compression method name + * GetCompressionMethodName + * + * Get compression method name, based on a compression method char, or + * attcompression. */ const char * GetCompressionMethodName(char method) { - switch (method) + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) { - case TOAST_PGLZ_COMPRESSION: - return "pglz"; - case TOAST_LZ4_COMPRESSION: - return "lz4"; - default: - elog(ERROR, "invalid compression method %c", method); - return NULL; /* keep compiler quiet */ + if (toast_compression_registry[i].method == method) + return toast_compression_registry[i].name; } + + elog(ERROR, "invalid compression method %c", method); + return NULL; /* keep compiler quiet */ +} + +/* + * MethodToCompressionId + * + * Translate a catalog compression method char (attcompression) to the + * corresponding on-disk varatt ID. + */ +uint32 +MethodToCompressionId(char method) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].method == method) + return toast_compression_registry[i].cmid; + } + + elog(ERROR, "invalid compression method %c", method); + return TOAST_INVALID_COMPRESSION_ID; /* keep compiler quiet */ +} + +/* + * CompressionIdIsValid + * + * Check whether a compression ID is registered. Returns true if the + * ID corresponds to a known compression method, false otherwise. + */ +bool +CompressionIdIsValid(uint32 cmid) +{ + for (int i = 0; i < TOAST_NUM_COMPRESSIONS; i++) + { + if (toast_compression_registry[i].cmid == cmid) + return true; + } + + return false; } diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 77d42e7ed65a..f30d132c1ff7 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -47,7 +47,7 @@ toast_compress_datum(Datum value, char cmethod) { varlena *tmp = NULL; int32 valsize; - ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID; + uint32 cmid = TOAST_INVALID_COMPRESSION_ID; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); @@ -65,11 +65,9 @@ toast_compress_datum(Datum value, char cmethod) { case TOAST_PGLZ_COMPRESSION: tmp = pglz_compress_datum((const varlena *) DatumGetPointer(value)); - cmid = TOAST_PGLZ_COMPRESSION_ID; break; case TOAST_LZ4_COMPRESSION: tmp = lz4_compress_datum((const varlena *) DatumGetPointer(value)); - cmid = TOAST_LZ4_COMPRESSION_ID; break; default: elog(ERROR, "invalid compression method %c", cmethod); @@ -78,6 +76,9 @@ toast_compress_datum(Datum value, char cmethod) if (tmp == NULL) return PointerGetDatum(NULL); + /* translate attcompression to the on-disk compression ID */ + cmid = MethodToCompressionId(cmethod); + /* * We recheck the actual size even if compression reports success, because * it might be satisfied with having saved as little as one byte in the diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index c0ff51bd2fc1..d7144a045bc6 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -4205,7 +4205,7 @@ pg_column_compression(PG_FUNCTION_ARGS) { int typlen; char *result; - ToastCompressionId cmid; + uint32 cmid; /* On first call, get the input type's typlen, and save at *fn_extra */ if (fcinfo->flinfo->fn_extra == NULL) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 20ff58aa7825..2d665a572754 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1784,26 +1784,11 @@ check_tuple_attribute(HeapCheckContext *ctx) if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) { - ToastCompressionId cmid; - bool valid = false; + uint32 cmid; /* Compressed attributes should have a valid compression method */ cmid = TOAST_COMPRESS_METHOD(&toast_pointer); - switch (cmid) - { - /* List of all valid compression method IDs */ - case TOAST_PGLZ_COMPRESSION_ID: - case TOAST_LZ4_COMPRESSION_ID: - valid = true; - break; - - /* Recognized but invalid compression method ID */ - case TOAST_INVALID_COMPRESSION_ID: - break; - - /* Intentionally no default here */ - } - if (!valid) + if (!CompressionIdIsValid(cmid)) report_corruption(ctx, psprintf("toast value %u has invalid compression method id %d", toast_pointer.va_valueid, cmid)); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 06532bf71526..7e506c246e34 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3200,7 +3200,7 @@ TimingClockSourceType TmFromChar TmToChar ToastAttrInfo -ToastCompressionId +ToastCompressionRegistryEntry ToastTupleContext ToastedAttribute TocEntry -- 2.54.0