From 969b1ba5a94449e10e56103f18ccf4f9c5481796 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 13 Oct 2025 16:09:36 -0400
Subject: [PATCH v4 1/4] Align the data block sizes of pg_dump's various
 compression modes.

After commit fe8192a95, compress_zstd.c tends to produce data block
sizes around 128K, and we don't really have any control over that
unless we want to overrule ZSTD_CStreamOutSize().  Which seems like
a bad idea.  But let's try to align the other compression modes to
produce block sizes roughly comparable to that, so that pg_restore's
skip-data performance isn't enormously different for different modes.

gzip compression can be brought in line simply by setting
DEFAULT_IO_BUFFER_SIZE = 128K, which this patch does.  That
increases some unrelated buffer sizes, but none of them seem
problematic for modern platforms.

lz4's idea of appropriate block size is highly nonlinear:
if we just increase DEFAULT_IO_BUFFER_SIZE then the output
blocks end up around 200K.  I found that adjusting the slop
factor in LZ4State_compression_init was a not-too-ugly way
of bringing that number roughly into line.

With compress = none you get data blocks the same sizes as the
table rows, which seems potentially problematic for narrow tables.
Introduce a layer of buffering to make that case match the others.

Comments in compress_io.h and 002_pg_dump.pl suggest that if
we increase DEFAULT_IO_BUFFER_SIZE then we need to increase the
amount of data fed through the tests in order to improve coverage.
I've not done that here, leaving it for a separate patch.

Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/3515357.1760128017@sss.pgh.pa.us
---
 src/bin/pg_dump/compress_io.h    |  4 +-
 src/bin/pg_dump/compress_lz4.c   |  9 ++++-
 src/bin/pg_dump/compress_none.c  | 64 +++++++++++++++++++++++++++++++-
 src/tools/pgindent/typedefs.list |  1 +
 4 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_dump/compress_io.h b/src/bin/pg_dump/compress_io.h
index 25a7bf0904d..ae008585c89 100644
--- a/src/bin/pg_dump/compress_io.h
+++ b/src/bin/pg_dump/compress_io.h
@@ -22,9 +22,9 @@
  *
  * When changing this value, it's necessary to check the relevant test cases
  * still exercise all the branches. This applies especially if the value is
- * increased, in which case the overflow buffer may not be needed.
+ * increased, in which case some loops may not get iterated.
  */
-#define DEFAULT_IO_BUFFER_SIZE	4096
+#define DEFAULT_IO_BUFFER_SIZE	(128 * 1024)
 
 extern char *supports_compression(const pg_compress_specification compression_spec);
 
diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c
index b817a083d38..450afd4e2be 100644
--- a/src/bin/pg_dump/compress_lz4.c
+++ b/src/bin/pg_dump/compress_lz4.c
@@ -100,9 +100,14 @@ LZ4State_compression_init(LZ4State *state)
 	state->buflen = LZ4F_compressBound(DEFAULT_IO_BUFFER_SIZE, &state->prefs);
 
 	/*
-	 * Then double it, to ensure we're not forced to flush every time.
+	 * Add some slop to ensure we're not forced to flush every time.
+	 *
+	 * The present slop factor of 50% is chosen so that the typical output
+	 * block size is about 128K when DEFAULT_IO_BUFFER_SIZE = 128K.  We might
+	 * need a different slop factor to maintain that equivalence if
+	 * DEFAULT_IO_BUFFER_SIZE is changed dramatically.
 	 */
-	state->buflen *= 2;
+	state->buflen += state->buflen / 2;
 
 	/*
 	 * LZ4F_compressBegin requires a buffer that is greater or equal to
diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c
index 4abb2e95abc..94c155a572d 100644
--- a/src/bin/pg_dump/compress_none.c
+++ b/src/bin/pg_dump/compress_none.c
@@ -22,6 +22,18 @@
  *----------------------
  */
 
+/*
+ * We buffer outgoing data, just to ensure that data blocks written to the
+ * archive file are of reasonable size.  The read side could use this struct,
+ * but there's no need because it does not retain data across calls.
+ */
+typedef struct NoneCompressorState
+{
+	char	   *buffer;			/* buffer for unwritten data */
+	size_t		buflen;			/* allocated size of buffer */
+	size_t		bufdata;		/* amount of valid data currently in buffer */
+} NoneCompressorState;
+
 /*
  * Private routines
  */
@@ -49,13 +61,45 @@ static void
 WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs,
 					   const void *data, size_t dLen)
 {
-	cs->writeF(AH, data, dLen);
+	NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
+	size_t		remaining = dLen;
+
+	while (remaining > 0)
+	{
+		size_t		chunk;
+
+		/* Dump buffer if full */
+		if (nonecs->bufdata >= nonecs->buflen)
+		{
+			cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
+			nonecs->bufdata = 0;
+		}
+		/* And fill it */
+		chunk = nonecs->buflen - nonecs->bufdata;
+		if (chunk > remaining)
+			chunk = remaining;
+		memcpy(nonecs->buffer + nonecs->bufdata, data, chunk);
+		nonecs->bufdata += chunk;
+		data = ((const char *) data) + chunk;
+		remaining -= chunk;
+	}
 }
 
 static void
 EndCompressorNone(ArchiveHandle *AH, CompressorState *cs)
 {
-	/* no op */
+	NoneCompressorState *nonecs = (NoneCompressorState *) cs->private_data;
+
+	if (nonecs)
+	{
+		/* Dump buffer if nonempty */
+		if (nonecs->bufdata > 0)
+			cs->writeF(AH, nonecs->buffer, nonecs->bufdata);
+		/* Free working state */
+		pg_free(nonecs->buffer);
+		pg_free(nonecs);
+		cs->private_data = NULL;
+	}
 }
 
 /*
@@ -71,6 +115,22 @@ InitCompressorNone(CompressorState *cs,
 	cs->end = EndCompressorNone;
 
 	cs->compression_spec = compression_spec;
+
+	/*
+	 * If the caller has defined a write function, prepare the necessary
+	 * buffer.
+	 */
+	if (cs->writeF)
+	{
+		NoneCompressorState *nonecs;
+
+		nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
+		nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
+		nonecs->buffer = pg_malloc(nonecs->buflen);
+		nonecs->bufdata = 0;
+
+		cs->private_data = nonecs;
+	}
 }
 
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 5290b91e83e..63f9387044b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1758,6 +1758,7 @@ NextValueExpr
 Node
 NodeTag
 NonEmptyRange
+NoneCompressorState
 Notification
 NotificationList
 NotifyStmt
-- 
2.43.7

