Re: Add LZ4 compression in pg_dump

From: Justin Pryzby <pryzby(at)telsasoft(dot)com>
To: Tomas Vondra <tomas(dot)vondra(at)enterprisedb(dot)com>
Cc: gkokolatos(at)pm(dot)me, shiy(dot)fnst(at)fujitsu(dot)com, Michael Paquier <michael(at)paquier(dot)xyz>, pgsql-hackers(at)lists(dot)postgresql(dot)org, Rachel Heaton <rachelmheaton(at)gmail(dot)com>
Subject: Re: Add LZ4 compression in pg_dump
Date: 2023-03-16 22:58:42
Message-ID: ZBOfIs5AIiM7EDV6@telsasoft.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

On Thu, Mar 16, 2023 at 11:30:50PM +0100, Tomas Vondra wrote:
> On 3/16/23 01:20, Justin Pryzby wrote:
> > But try reading the diff while looking for the cause of a bug. It's the
> > difference between reading 50, two-line changes, and reading a hunk that
> > replaces 100 lines with a different 100 lines, with empty/unrelated
> > lines randomly thrown in as context.
>
> I don't know, maybe I'm doing something wrong or maybe I just am bad at
> looking at diffs, but if I apply the patch you submitted on 8/3 and do
> the git-diff above (output attached), it seems pretty incomprehensible
> to me :-( I don't see 50 two-line changes (I certainly wouldn't be able
> to identify the root cause of the bug based on that).

It's true that most of the diff is still incomprehensible...

But look at the part relevant to the "empty-data" bug:

[... incomprehensible changes elided ...]
> static void
> -InitCompressorZlib(CompressorState *cs, int level)
> +DeflateCompressorInit(CompressorState *cs)
> {
> + GzipCompressorState *gzipcs;
> z_streamp zp;
>
> - zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
> + gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
> + zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
> zp->zalloc = Z_NULL;
> zp->zfree = Z_NULL;
> zp->opaque = Z_NULL;
>
> /*
> - * zlibOutSize is the buffer size we tell zlib it can output to. We
> - * actually allocate one extra byte because some routines want to append a
> - * trailing zero byte to the zlib output.
> + * outsize is the buffer size we tell zlib it can output to. We actually
> + * allocate one extra byte because some routines want to append a trailing
> + * zero byte to the zlib output.
> */
> - cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
> - cs->zlibOutSize = ZLIB_OUT_SIZE;
> + gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1);
> + gzipcs->outsize = ZLIB_OUT_SIZE;
>
> - if (deflateInit(zp, level) != Z_OK)
> - pg_fatal("could not initialize compression library: %s",
> - zp->msg);
> + /* -Z 0 uses the "None" compressor -- not zlib with no compression */
> + Assert(cs->compression_spec.level != 0);
> +
> + if (deflateInit(zp, cs->compression_spec.level) != Z_OK)
> + pg_fatal("could not initialize compression library: %s", zp->msg);
>
> /* Just be paranoid - maybe End is called after Start, with no Write */
> - zp->next_out = (void *) cs->zlibOut;
> - zp->avail_out = cs->zlibOutSize;
> + zp->next_out = gzipcs->outbuf;
> + zp->avail_out = gzipcs->outsize;
> +
> + /* Keep track of gzipcs */
> + cs->private_data = gzipcs;
> }
>
> static void
> -EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs)
> +DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs)
> {
> - z_streamp zp = cs->zp;
> + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> + z_streamp zp;
>
> + zp = gzipcs->zp;
> zp->next_in = NULL;
> zp->avail_in = 0;
>
> /* Flush any remaining data from zlib buffer */
> - DeflateCompressorZlib(AH, cs, true);
> + DeflateCompressorCommon(AH, cs, true);
>
> if (deflateEnd(zp) != Z_OK)
> pg_fatal("could not close compression stream: %s", zp->msg);
>
> - free(cs->zlibOut);
> - free(cs->zp);
> + pg_free(gzipcs->outbuf);
> + pg_free(gzipcs->zp);
> + pg_free(gzipcs);
> + cs->private_data = NULL;
> }
>
> static void
> -DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
> +DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush)
> {
> - z_streamp zp = cs->zp;
> - char *out = cs->zlibOut;
> + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> + z_streamp zp = gzipcs->zp;
> + void *out = gzipcs->outbuf;
> int res = Z_OK;
>
> - while (cs->zp->avail_in != 0 || flush)
> + while (gzipcs->zp->avail_in != 0 || flush)
> {
> res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH);
> if (res == Z_STREAM_ERROR)
> pg_fatal("could not compress data: %s", zp->msg);
> - if ((flush && (zp->avail_out < cs->zlibOutSize))
> + if ((flush && (zp->avail_out < gzipcs->outsize))
> || (zp->avail_out == 0)
> || (zp->avail_in != 0)
> )
> @@ -289,18 +122,18 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
> * chunk is the EOF marker in the custom format. This should never
> * happen but ...
> */
> - if (zp->avail_out < cs->zlibOutSize)
> + if (zp->avail_out < gzipcs->outsize)
> {
> /*
> * Any write function should do its own error checking but to
> * make sure we do a check here as well ...
> */
> - size_t len = cs->zlibOutSize - zp->avail_out;
> + size_t len = gzipcs->outsize - zp->avail_out;
>
> - cs->writeF(AH, out, len);
> + cs->writeF(AH, (char *) out, len);
> }
> - zp->next_out = (void *) out;
> - zp->avail_out = cs->zlibOutSize;
> + zp->next_out = out;
> + zp->avail_out = gzipcs->outsize;
> }
>
> if (res == Z_STREAM_END)
> @@ -309,16 +142,26 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush)
> }
>
> static void
> -WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs,
> - const char *data, size_t dLen)
> +EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs)
> {
> - cs->zp->next_in = (void *) unconstify(char *, data);
> - cs->zp->avail_in = dLen;
> - DeflateCompressorZlib(AH, cs, false);
> + /* If deflation was initialized, finalize it */
> + if (cs->private_data)
> + DeflateCompressorEnd(AH, cs);
> }
>
> static void
> -ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
> +WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs,
> + const void *data, size_t dLen)
> +{
> + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data;
> +
> + gzipcs->zp->next_in = (void *) unconstify(void *, data);
> + gzipcs->zp->avail_in = dLen;
> + DeflateCompressorCommon(AH, cs, false);
> +}
> +
> +static void
> +ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
> {
> z_streamp zp;
> char *out;
> @@ -342,7 +185,7 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
> zp->msg);
>
> /* no minimal chunk size for zlib */
> - while ((cnt = readF(AH, &buf, &buflen)))
> + while ((cnt = cs->readF(AH, &buf, &buflen)))
> {
> zp->next_in = (void *) buf;
> zp->avail_in = cnt;
> @@ -382,389 +225,196 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF)
> free(out);
> free(zp);
> }
[... more incomprehensible changes elided ...]

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Melanie Plageman 2023-03-16 23:02:43 Re: Refactor calculations to use instr_time
Previous Message Tomas Vondra 2023-03-16 22:30:50 Re: Add LZ4 compression in pg_dump