A patch for xlog.c

From: Matthew Kirkwood <matthew(at)hairy(dot)beasts(dot)org>
To: pgsql-patches(at)postgresql(dot)org
Subject: A patch for xlog.c
Date: 2001-02-24 15:49:37
Message-ID: Pine.LNX.4.10.10102241536450.12959-100000@sphinx.mythic-beasts.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers pgsql-patches

Hi,

Here is a patch against 7.1beta5 to use mmap(), and thus a
single write, to initialise xlogs. It may well improve
performance of this on platforms/filesystems which write
metadata synchronously.

It needs a configure test, but certainly builds and runs
OK.

It also wraps the file reopening in an "ifdef WIN32", since
it certainly isn't needed for UNIX-like platforms (which I
assume includes BeOS).

Matthew.

diff -ruN postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c postgresql-7.1beta5/src/backend/access/transam/xlog.c
--- postgresql-7.1beta5-clean/src/backend/access/transam/xlog.c Fri Feb 23 18:12:00 2001
+++ postgresql-7.1beta5/src/backend/access/transam/xlog.c Sat Feb 24 15:23:41 2001
@@ -24,6 +24,10 @@
#include <locale.h>
#endif

+#ifdef _HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/catversion.h"
@@ -36,6 +40,7 @@
#include "access/xlogutils.h"
#include "utils/builtins.h"
#include "utils/relcache.h"
+#include "utils/pfile.h"

#include "miscadmin.h"

@@ -53,6 +58,10 @@
StartUpID ThisStartUpID = 0;
XLogRecPtr RedoRecPtr;

+#ifdef _HAVE_MMAP
+void *zmmap = NULL;
+#endif
+
int XLOG_DEBUG = 0;

/* To read/update control file and create new log file */
@@ -955,7 +964,6 @@
{
char path[MAXPGPATH];
char tpath[MAXPGPATH];
- char zbuffer[BLCKSZ];
int fd;
int nbytes;

@@ -987,28 +995,36 @@
elog(STOP, "InitCreate(logfile %u seg %u) failed: %m",
logId, logSeg);

- /*
- * Zero-fill the file. We have to do this the hard way to ensure that
- * all the file space has really been allocated --- on platforms that
- * allow "holes" in files, just seeking to the end doesn't allocate
- * intermediate space. This way, we know that we have all the space
- * and (after the fsync below) that all the indirect blocks are down
- * on disk. Therefore, fdatasync(2) will be sufficient to sync future
- * writes to the log file.
- */
- MemSet(zbuffer, 0, sizeof(zbuffer));
- for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
+#ifdef _HAVE_MMAP
+ if (!zmmap || (write(fd, zmmap, XLogSegSize) != XLogSegSize))
+#endif
{
- if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
- elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
- logId, logSeg);
+ /*
+ * Zero-fill the file. We have to do this the hard way to ensure that
+ * all the file space has really been allocated --- on platforms that
+ * allow "holes" in files, just seeking to the end doesn't allocate
+ * intermediate space. This way, we know that we have all the space
+ * and (after the fsync below) that all the indirect blocks are down
+ * on disk. Therefore, fdatasync(2) will be sufficient to sync future
+ * writes to the log file.
+ */
+ char zbuffer[BLCKSZ];
+ MemSet(zbuffer, 0, sizeof(zbuffer));
+ for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer))
+ {
+ if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
+ elog(STOP, "ZeroFill(logfile %u seg %u) failed: %m",
+ logId, logSeg);
+ }
}

if (pg_fsync(fd) != 0)
elog(STOP, "fsync(logfile %u seg %u) failed: %m",
logId, logSeg);

+#ifdef WIN32
close(fd);
+#endif

/*
* Prefer link() to rename() here just to be sure that we don't overwrite
@@ -1026,10 +1042,12 @@
logId, logSeg);
#endif

+#ifdef WIN32
fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR);
if (fd < 0)
elog(STOP, "InitReopen(logfile %u seg %u) failed: %m",
logId, logSeg);
+#endif

return (fd);
}
@@ -1255,11 +1273,8 @@
if (noBlck || readOff != (RecPtr->xrecoff % XLogSegSize) / BLCKSZ)
{
readOff = (RecPtr->xrecoff % XLogSegSize) / BLCKSZ;
- if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
- elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
- readId, readSeg, readOff);
- if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
- elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
+ if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+ elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
readId, readSeg, readOff);
if (((XLogPageHeader) readBuf)->xlp_magic != XLOG_PAGE_MAGIC)
{
@@ -1415,19 +1430,13 @@
elog(LOG, "Formatting logfile %u seg %u block %u at offset %u",
readId, readSeg, readOff, EndRecPtr.xrecoff % BLCKSZ);
readFile = XLogFileOpen(readId, readSeg, false);
- if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
- elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
- readId, readSeg, readOff);
- if (read(readFile, readBuf, BLCKSZ) != BLCKSZ)
- elog(STOP, "ReadRecord: read(logfile %u seg %u off %u) failed: %m",
+ if (pg_pread(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+ elog(STOP, "ReadRecord: pg_pread(logfile %u seg %u off %u) failed: %m",
readId, readSeg, readOff);
memset(readBuf + EndRecPtr.xrecoff % BLCKSZ, 0,
BLCKSZ - EndRecPtr.xrecoff % BLCKSZ);
- if (lseek(readFile, (off_t) (readOff * BLCKSZ), SEEK_SET) < 0)
- elog(STOP, "ReadRecord: lseek(logfile %u seg %u off %u) failed: %m",
- readId, readSeg, readOff);
- if (write(readFile, readBuf, BLCKSZ) != BLCKSZ)
- elog(STOP, "ReadRecord: write(logfile %u seg %u off %u) failed: %m",
+ if (pg_pwrite(readFile, readBuf, BLCKSZ, (readOff * BLCKSZ)) != BLCKSZ)
+ elog(STOP, "ReadRecord: pg_pwrite(logfile %u seg %u off %u) failed: %m",
readId, readSeg, readOff);
readOff++;
}
@@ -1797,6 +1806,28 @@
return buf;
}

+
+#ifdef _HAVE_MMAP
+static void
+ZeroMapInit(void)
+{
+ int zfd;
+
+ zfd = BasicOpenFile("/dev/zero", O_RDONLY, 0);
+ if (zfd < 0) {
+ elog(LOG, "Can't open /dev/zero: %m");
+ return;
+ }
+ zmmap = mmap(NULL, XLogSegSize, PROT_READ, MAP_SHARED, zfd, 0);
+ if (!zmmap)
+ elog(LOG, "Can't mmap /dev/zero: %m");
+ close(zfd);
+}
+#else
+#define ZeroMapInit()
+#endif
+
+
/*
* This func must be called ONCE on system startup
*/
@@ -1811,6 +1842,9 @@
char buffer[_INTL_MAXLOGRECSZ + SizeOfXLogRecord];

elog(LOG, "starting up");
+
+ ZeroMapInit();
+
CritSectionCount++;

XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData));

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Tom Lane 2001-02-24 16:41:06 Re: A patch for xlog.c
Previous Message Tatsuo Ishii 2001-02-24 12:41:14 pgaccess Japanese input capability patch

Browse pgsql-patches by date

  From Date Subject
Next Message Matthew Kirkwood 2001-02-24 16:15:03 Small misc tidyup patch
Previous Message Chad David 2001-02-22 18:25:41 [PATCHES] BlobInputStream.java patch