[PATCH 4/6] pg_basebackup: don't lose a zero-length file at the end of a tablespace.

From: Joshua Elsasser <josh(at)idealist(dot)org>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Joshua Elsasser <josh(at)idealist(dot)org>
Subject: [PATCH 4/6] pg_basebackup: don't lose a zero-length file at the end of a tablespace.
Date: 2015-09-29 22:16:26
Message-ID: 1443564988-17928-5-git-send-email-josh@idealist.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

After a complete tar header was buffered it would only be processed
during the next iteration of the read loop. A zero-length file such as
a directory had no data to read, so the loop would exit without ever
having processed the file.
---
src/bin/pg_basebackup/pg_basebackup.c | 238 +++++++++++++++++++---------------
1 file changed, 134 insertions(+), 104 deletions(-)

diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index f73dd38..ccd0890 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -60,6 +60,15 @@ typedef struct TarStream {
bool keepopen;
} TarStream;

+typedef struct TarParser {
+ char tarhdr[512];
+ TarStream *stream;
+ bool in_tarhdr;
+ bool skip_file;
+ size_t tarhdrsz;
+ size_t filesz;
+} TarParser;
+
/* Global options */
static char *basedir = NULL;
static TablespaceList tablespace_dirs = {NULL, NULL};
@@ -112,6 +121,10 @@ static void progress_report(int tablespacenum, const char *filename, bool force)
static void OpenTarFile(TarStream *tarfile, const char *path);
static void CloseTarFile(TarStream *tarfile);
static void TarInsertRecoveryConf(TarStream *stream);
+static void IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen,
+ bool (*callback)(char *, void *), void *cbarg);
+static bool TarIterSkipRecoveryConf(char *h, void *arg);
+
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
static void ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum);
static void GenerateRecoveryConf(PGconn *conn);
@@ -899,6 +912,120 @@ TarInsertRecoveryConf(TarStream *stream)


/*
+ * Process the individual files inside the TAR stream and pass their headers
+ * to a callback which can modify or chose to skip them. The stream consists
+ * of a header and zero or more chunks, all 512 bytes long. The stream from
+ * the server is broken up into smaller pieces, so we have to track the size
+ * of the files to find the next header structure.
+ */
+static void
+IterateAndWriteTar(TarParser *tp, char *inbuf, int buflen,
+ bool (*callback)(char *, void *), void *cbarg)
+{
+ int rr = buflen;
+ int pos = 0;
+
+ while (rr > 0)
+ {
+ if (tp->in_tarhdr)
+ {
+ /*
+ * We're currently reading a header structure inside the
+ * TAR stream, i.e. the file metadata.
+ */
+ if (tp->tarhdrsz < 512)
+ {
+ /*
+ * Copy the header structure into tarhdr in case the
+ * header is not aligned to 512 bytes or it's not
+ * returned in whole by the last PQgetCopyData call.
+ */
+ int hdrleft;
+ int bytes2copy;
+
+ hdrleft = 512 - tp->tarhdrsz;
+ bytes2copy = (rr > hdrleft ? hdrleft : rr);
+
+ memcpy(&tp->tarhdr[tp->tarhdrsz], inbuf + pos, bytes2copy);
+
+ rr -= bytes2copy;
+ pos += bytes2copy;
+ tp->tarhdrsz += bytes2copy;
+ }
+
+ if (tp->tarhdrsz == 512)
+ {
+ /*
+ * We have the complete header structure in tarhdr, let the
+ * callback possibly modify it or chose to skip the file. Find
+ * out the size of the file padded to the next multiple of 512
+ */
+ int padding;
+
+ tp->skip_file = callback(tp->tarhdr, cbarg);
+
+ sscanf(&tp->tarhdr[124], "%11o", (unsigned int *) &tp->filesz);
+
+ padding = ((tp->filesz + 511) & ~511) - tp->filesz;
+ tp->filesz += padding;
+
+ /* Next part is the file, not the header */
+ tp->in_tarhdr = false;
+
+ /*
+ * If we're not skipping the file, write the tar
+ * header unmodified.
+ */
+ if (!tp->skip_file)
+ writeTarData(tp->stream, tp->tarhdr, 512);
+ }
+ }
+ else
+ {
+ /*
+ * We're processing a file's contents.
+ */
+ if (tp->filesz > 0)
+ {
+ /*
+ * We still have data to read (and possibly write).
+ */
+ int bytes2write;
+
+ bytes2write = (tp->filesz > rr ? rr : tp->filesz);
+
+ if (!tp->skip_file)
+ writeTarData(tp->stream, inbuf + pos, bytes2write);
+
+ rr -= bytes2write;
+ pos += bytes2write;
+ tp->filesz -= bytes2write;
+ }
+ else
+ {
+ /*
+ * No more data in the current file, the next piece of
+ * data (if any) will be a new file header structure.
+ */
+ tp->in_tarhdr = true;
+ tp->skip_file = false;
+ tp->tarhdrsz = 0;
+ tp->filesz = 0;
+ }
+ }
+ }
+}
+
+
+static bool
+TarIterSkipRecoveryConf(char *h, void *arg)
+{
+ /* Skip the file if the name is recovery.conf */
+ return (strcmp(&h[0], "recovery.conf") == 0);
+}
+
+
+/*
* Open a (possibly zlib-compressed) tar file for writing. The filebase
* argument should be the desired filename relative to basedir, without a .tar
* or .tar.gz file extension. If the user specified a basedir of - then stdout
@@ -936,12 +1063,12 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
{
TarStream stream;
char *copybuf = NULL;
- char tarhdr[512];
bool basetablespace = PQgetisnull(res, rownum, 0);
- bool in_tarhdr = true;
- bool skip_file = false;
- size_t tarhdrsz = 0;
- size_t filesz = 0;
+ TarParser parser;
+
+ MemSet(&parser, 0, sizeof(parser));
+ parser.stream = &stream;
+ parser.in_tarhdr = true;

if (basetablespace)
/* Base tablespace */
@@ -1010,106 +1137,9 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
* Look for a recovery.conf in the existing tar stream. If it's
* there, we must skip it so we can later overwrite it with our
* own version of the file.
- *
- * To do this, we have to process the individual files inside the
- * TAR stream. The stream consists of a header and zero or more
- * chunks, all 512 bytes long. The stream from the server is
- * broken up into smaller pieces, so we have to track the size of
- * the files to find the next header structure.
*/
- int rr = r;
- int pos = 0;
-
- while (rr > 0)
- {
- if (in_tarhdr)
- {
- /*
- * We're currently reading a header structure inside the
- * TAR stream, i.e. the file metadata.
- */
- if (tarhdrsz < 512)
- {
- /*
- * Copy the header structure into tarhdr in case the
- * header is not aligned to 512 bytes or it's not
- * returned in whole by the last PQgetCopyData call.
- */
- int hdrleft;
- int bytes2copy;
-
- hdrleft = 512 - tarhdrsz;
- bytes2copy = (rr > hdrleft ? hdrleft : rr);
-
- memcpy(&tarhdr[tarhdrsz], copybuf + pos, bytes2copy);
-
- rr -= bytes2copy;
- pos += bytes2copy;
- tarhdrsz += bytes2copy;
- }
- else
- {
- /*
- * We have the complete header structure in tarhdr,
- * look at the file metadata: - the subsequent file
- * contents have to be skipped if the filename is
- * recovery.conf - find out the size of the file
- * padded to the next multiple of 512
- */
- int padding;
-
- skip_file = (strcmp(&tarhdr[0], "recovery.conf") == 0);
-
- sscanf(&tarhdr[124], "%11o", (unsigned int *) &filesz);
-
- padding = ((filesz + 511) & ~511) - filesz;
- filesz += padding;
-
- /* Next part is the file, not the header */
- in_tarhdr = false;
-
- /*
- * If we're not skipping the file, write the tar
- * header unmodified.
- */
- if (!skip_file)
- writeTarData(&stream, tarhdr, 512);
- }
- }
- else
- {
- /*
- * We're processing a file's contents.
- */
- if (filesz > 0)
- {
- /*
- * We still have data to read (and possibly write).
- */
- int bytes2write;
-
- bytes2write = (filesz > rr ? rr : filesz);
-
- if (!skip_file)
- writeTarData(&stream, copybuf + pos, bytes2write);
-
- rr -= bytes2write;
- pos += bytes2write;
- filesz -= bytes2write;
- }
- else
- {
- /*
- * No more data in the current file, the next piece of
- * data (if any) will be a new file header structure.
- */
- in_tarhdr = true;
- skip_file = false;
- tarhdrsz = 0;
- filesz = 0;
- }
- }
- }
+ IterateAndWriteTar(&parser, copybuf, r,
+ TarIterSkipRecoveryConf, NULL);
}
totaldone += r;
progress_report(rownum, stream.path, false);
--
2.3.0

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Joshua Elsasser 2015-09-29 22:16:27 [PATCH 5/6] pg_basebackup: allow GetConnection() to make non-replication connections.
Previous Message Joshua Elsasser 2015-09-29 22:16:25 [PATCH 3/6] pg_basebackup: factor out code to add a recovery.conf file to the tar file.