From 7167c0e5f78d74a0c02a76748e7b8caaabe65ccd Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 27 Sep 2019 00:51:52 +0200 Subject: [PATCH] Optimize partial TOAST decompression --- src/backend/access/common/detoast.c | 53 +++++++++++++++++++++++----- src/common/pg_lzcompress.c | 27 ++++++++++++++ src/include/access/toast_internals.h | 1 + src/include/common/pg_lzcompress.h | 1 + 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index c8b49d6a12..4727c892a8 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -196,6 +196,8 @@ heap_tuple_untoast_attr(struct varlena *attr) * * Public entry point to get back part of a toasted value * from compression or external storage. + * + * Note: when slicelength is negative, return suffix of the value. * ---------- */ struct varlena * @@ -217,8 +219,29 @@ heap_tuple_untoast_attr_slice(struct varlena *attr, if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* fetch it back (compressed marker will get set automatically) */ - preslice = toast_fetch_datum(attr); + /* + * When a prefix of the value is requested, fetch enough slices to + * decompress. Otherwise, fetch all slices. + */ + if (slicelength > 0 && sliceoffset >= 0) + { + int32 max_size; + + /* + * Determine maximum amount of compressed data needed for a prefix + * of a given length (after decompression). + */ + max_size = pglz_maximum_compressed_size(sliceoffset + slicelength, + TOAST_COMPRESS_SIZE(attr)); + + /* + * Fetch enough compressed slices, compressed marker will get set + * automatically. + */ + preslice = toast_fetch_datum_slice(attr, 0, max_size); + } + else + preslice = toast_fetch_datum(attr); } else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) { @@ -476,7 +499,9 @@ toast_fetch_datum(struct varlena *attr) * Reconstruct a segment of a Datum from the chunks saved * in the toast relation * - * Note that this function only supports non-compressed external datums. + * Note that this function supports non-compressed external datums + * and compressed external datums (in which case the requrested slice + * has to be a prefix, i.e. sliceoffset has to be 0). * ---------- */ static struct varlena * @@ -517,10 +542,11 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); /* - * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* - * we can't return a compressed datum which is meaningful to toast later + * It's meaningful to fetch slices at the start of a compressed datum, + * since we can decompress the prefix raw data without decompressing the + * whole value. */ - Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); + Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset); attrsize = toast_pointer.va_extsize; totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; @@ -531,12 +557,23 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) length = 0; } + /* + * When fetching a prefix of a compressed external datum, account for + * the rawsize tracking amount of raw data (it's stored at the beginning + * as an int32 value). + */ + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0) + length = length + sizeof(int32); + if (((sliceoffset + length) > attrsize) || length < 0) length = attrsize - sliceoffset; result = (struct varlena *) palloc(length + VARHDRSZ); - SET_VARSIZE(result, length + VARHDRSZ); + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ); + else + SET_VARSIZE(result, length + VARHDRSZ); if (length == 0) return result; /* Can save a lot of work at this point! */ @@ -720,7 +757,7 @@ toast_decompress_datum(struct varlena *attr) SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), - VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, + TOAST_COMPRESS_SIZE(attr), VARDATA(result), TOAST_COMPRESS_RAWSIZE(attr), true) < 0) elog(ERROR, "compressed data is corrupted"); diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 988b3987d0..18766ade02 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -771,3 +771,30 @@ pglz_decompress(const char *source, int32 slen, char *dest, */ return (char *) dp - dest; } + + +/* ---------- + * pglz_max_compressed_size - + * + * Calculate the maximum compressed size for a given amount of raw data. + * Return the maximum size, or total compressed size if maximum size is + * larger than total compressed size. + * ---------- + */ +int32 +pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size) +{ + int32 compressed_size; + + /* + * Use int64 to prevent overflow during calculation. + */ + compressed_size = (int32) ((int64) rawsize * 9 + 8) / 8; + + /* + * Maximum compressed size can't be larger than total compressed size. + */ + compressed_size = Min(compressed_size, total_compressed_size); + + return compressed_size; +} diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 494b07a4b1..b8703d8c94 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -31,6 +31,7 @@ typedef struct toast_compress_header */ #define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize) +#define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE(ptr) - TOAST_COMPRESS_HDRSZ) #define TOAST_COMPRESS_RAWDATA(ptr) \ (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ) #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ diff --git a/src/include/common/pg_lzcompress.h b/src/include/common/pg_lzcompress.h index 555576436c..e59fe2eaea 100644 --- a/src/include/common/pg_lzcompress.h +++ b/src/include/common/pg_lzcompress.h @@ -87,5 +87,6 @@ extern int32 pglz_compress(const char *source, int32 slen, char *dest, const PGLZ_Strategy *strategy); extern int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete); +extern int32 pglz_maximum_compressed_size(int32 rawsize, int32 total_compressed_size); #endif /* _PG_LZCOMPRESS_H_ */ -- 2.21.0