From 90e06cbb724f6f6a244dfc69f3d59ca2e7d29c01 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilman1@gmail.com>
Date: Wed, 20 May 2020 22:49:28 -0400
Subject: [PATCH 1/4] Scan all TOCs when restoring a custom dump file without
 offsets

TOC requests are not guaranteed to come in disk order. If the custom
dump file was written with data offsets, pg_restore can seek directly to
the data, making request order irrelevant. If there are no data offsets,
pg_restore would never attempt to seek backwards, even when seeking is
possible, and would be unable to find TOCs before the current read
position in the file. 548e50976 changed how pg_restore's parallel
algorithm worked at the cost of greatly increasing out-of-order TOC
requests.

This patch changes pg_restore to scan through all TOCs to service a TOC
read request when restoring a custom dump file without data offsets.
The odds of getting a successful parallel restore go way up at the cost
of a bunch of extra tiny reads when pg_restore starts up.

The pg_restore manpage now warns against running pg_dump with an
unseekable output file and suggests that if you plan on doing a parallel
restore of a custom dump, you should run pg_dump with --file.
---
 doc/src/sgml/ref/pg_restore.sgml   |  8 ++++++++
 src/bin/pg_dump/pg_backup_custom.c | 25 ++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 232f88024f..23286bb076 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -279,6 +279,14 @@ PostgreSQL documentation
         jobs cannot be used together with the
         option <option>--single-transaction</option>.
        </para>
+
+       <para>
+        <application>pg_restore</application> with concurrent jobs may fail
+        when restoring a custom archive format dump written to an unseekable
+        output stream, like stdout. To allow for concurrent restoration of
+        a custom archive format dump, use <application>pg_dump</application>'s
+        <option>--file</option> option to specify an output file.
+       </para>
       </listitem>
      </varlistentry>
 
diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index 369dcea429..5aa7ab33db 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -415,6 +415,7 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
 	int			blkType;
 	int			id;
+	bool		initialScan = true;
 
 	if (tctx->dataState == K_OFFSET_NO_DATA)
 		return;
@@ -423,13 +424,28 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	{
 		/*
 		 * We cannot seek directly to the desired block.  Instead, skip over
-		 * block headers until we find the one we want.  This could fail if we
-		 * are asked to restore items out-of-order.
+		 * block headers until we find the one we want.
 		 */
-		_readBlockHeader(AH, &blkType, &id);
 
-		while (blkType != EOF && id != te->dumpId)
+		for (;;)
 		{
+			_readBlockHeader(AH, &blkType, &id);
+
+			if (blkType == EOF && ctx->hasSeek && initialScan)
+			{
+				/*
+				 * This was possibly an out-of-order request. Try one extra
+				 * pass over the file to find the TOC.
+				 */
+				initialScan = false;
+				if (fseeko(AH->FH, ctx->dataStart, SEEK_SET) != 0)
+					fatal("error during file seek: %m");
+				continue;
+			}
+
+			if (blkType == EOF || id == te->dumpId)
+				break;
+
 			switch (blkType)
 			{
 				case BLK_DATA:
@@ -445,7 +461,6 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 						  blkType);
 					break;
 			}
-			_readBlockHeader(AH, &blkType, &id);
 		}
 	}
 	else
-- 
2.26.2

