Skip site navigation (1) Skip section navigation (2)

Re: Free WAL caches on switching segments

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
Cc: ITAGAKI Takahiro <itagaki(dot)takahiro(at)lab(dot)ntt(dot)co(dot)jp>, pgsql-patches(at)postgresql(dot)org
Subject: Re: Free WAL caches on switching segments
Date: 2006-06-15 19:15:01
Message-ID: 200606151915.k5FJF1116456@candle.pha.pa.us (view raw or flat)
Thread:
Lists: pgsql-patches
Patch applied.  Thanks.

---------------------------------------------------------------------------


Bruce Momjian wrote:
> 
> I have modified your patch (attached) and will apply soon, unless there
> are more community comments.  Thanks.
> 
> ---------------------------------------------------------------------------
> 
> ITAGAKI Takahiro wrote:
> > Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us> wrote:
> > 
> > > > > > Here is a small patch to prevent undesired WAL file caching by kernel.
> > > > > > posix_fadvise(POSIX_FADV_DONTNEED) attempts to free cached pages and
> > > > > > the kernel will discard them in preference to other data caches.
> > > > > 
> > > > > On plenty of platforms, this won't even compile ...
> > > 
> > > Yes, but we probably have to have a configure test to see if
> > > posix_fadvise exists too.  I will keep this for 8.2.
> > 
> > I think we can use _POSIX_ADVISORY_INFO to test if posix_fadvise exists.
> > Also, I added the check on whether WAL archiving is enabled, because
> > archivers might use the caches to read the WAL segment.
> > 
> > 
> > By the way, should we put posix_fadvise on the separated place with renaming
> > pg_fadvise? If we use posix_fadvise in other purposes, for example,
> > read-ahead control, the separation would be good to keep codes clean.
> > 
> > ---
> > ITAGAKI Takahiro
> > NTT Cyber Space Laboratories
> > 
> 
> [ Attachment, skipping... ]
> 
> > 
> > ---------------------------(end of broadcast)---------------------------
> > TIP 9: In versions below 8.0, the planner will ignore your desire to
> >        choose an index scan if your joining column's datatypes do not
> >        match
> 
> -- 
>   Bruce Momjian   http://candle.pha.pa.us
>   EnterpriseDB    http://www.enterprisedb.com
> 
>   + If your life is a hard drive, Christ can be your backup. +

> Index: src/backend/access/transam/xlog.c
> ===================================================================
> RCS file: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v
> retrieving revision 1.237
> diff -c -c -r1.237 xlog.c
> *** src/backend/access/transam/xlog.c	20 Apr 2006 04:07:38 -0000	1.237
> --- src/backend/access/transam/xlog.c	14 Jun 2006 17:38:10 -0000
> ***************
> *** 478,483 ****
> --- 478,484 ----
>   					   bool use_lock);
>   static int	XLogFileOpen(uint32 log, uint32 seg);
>   static int	XLogFileRead(uint32 log, uint32 seg, int emode);
> + static void	XLogFileClose(void);
>   static bool RestoreArchivedFile(char *path, const char *xlogfname,
>   					const char *recovername, off_t expectedSize);
>   static int	PreallocXlogFiles(XLogRecPtr endptr);
> ***************
> *** 1384,1397 ****
>   			 */
>   			Assert(npages == 0);
>   			if (openLogFile >= 0)
> ! 			{
> ! 				if (close(openLogFile))
> ! 					ereport(PANIC,
> ! 							(errcode_for_file_access(),
> ! 						errmsg("could not close log file %u, segment %u: %m",
> ! 							   openLogId, openLogSeg)));
> ! 				openLogFile = -1;
> ! 			}
>   			XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
>   
>   			/* create/use new log file */
> --- 1385,1391 ----
>   			 */
>   			Assert(npages == 0);
>   			if (openLogFile >= 0)
> ! 				XLogFileClose();
>   			XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
>   
>   			/* create/use new log file */
> ***************
> *** 1567,1580 ****
>   		{
>   			if (openLogFile >= 0 &&
>   				!XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg))
> ! 			{
> ! 				if (close(openLogFile))
> ! 					ereport(PANIC,
> ! 							(errcode_for_file_access(),
> ! 						errmsg("could not close log file %u, segment %u: %m",
> ! 							   openLogId, openLogSeg)));
> ! 				openLogFile = -1;
> ! 			}
>   			if (openLogFile < 0)
>   			{
>   				XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
> --- 1561,1567 ----
>   		{
>   			if (openLogFile >= 0 &&
>   				!XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg))
> ! 				XLogFileClose();
>   			if (openLogFile < 0)
>   			{
>   				XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
> ***************
> *** 2153,2158 ****
> --- 2140,2173 ----
>   }
>   
>   /*
> +  * Close the current logfile segment for writing.
> +  */
> + static void
> + XLogFileClose(void)
> + {
> + 	Assert(openLogFile >= 0);
> + 
> + #ifdef _POSIX_ADVISORY_INFO
> + 	/*
> + 	 * WAL caches will not be accessed in the future, so we advise OS to
> + 	 * free them. But we will not do so if WAL archiving is active,
> + 	 * because archivers might use the caches to read the WAL segment.
> + 	 * While O_DIRECT works for O_SYNC, posix_fadvise() works for fsync()
> + 	 * and O_SYNC, and some platforms only have posix_fadvise().
> + 	 */
> + 	if (!XLogArchivingActive())
> + 		posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
> + #endif
> + 
> + 	if (close(openLogFile))
> + 		ereport(PANIC,
> + 			(errcode_for_file_access(),
> + 			errmsg("could not close log file %u, segment %u: %m",
> + 				   openLogId, openLogSeg)));
> + 	openLogFile = -1;
> + }
> + 
> + /*
>    * Attempt to retrieve the specified file from off-line archival storage.
>    * If successful, fill "path" with its complete path (note that this will be
>    * a temp file name that doesn't follow the normal naming convention), and
> ***************
> *** 5609,5622 ****
>   						 errmsg("could not fsync log file %u, segment %u: %m",
>   								openLogId, openLogSeg)));
>   			if (open_sync_bit != new_sync_bit)
> ! 			{
> ! 				if (close(openLogFile))
> ! 					ereport(PANIC,
> ! 							(errcode_for_file_access(),
> ! 						errmsg("could not close log file %u, segment %u: %m",
> ! 							   openLogId, openLogSeg)));
> ! 				openLogFile = -1;
> ! 			}
>   		}
>   		sync_method = new_sync_method;
>   		open_sync_bit = new_sync_bit;
> --- 5624,5630 ----
>   						 errmsg("could not fsync log file %u, segment %u: %m",
>   								openLogId, openLogSeg)));
>   			if (open_sync_bit != new_sync_bit)
> ! 				XLogFileClose();
>   		}
>   		sync_method = new_sync_method;
>   		open_sync_bit = new_sync_bit;

> 
> ---------------------------(end of broadcast)---------------------------
> TIP 3: Have you checked our extensive FAQ?
> 
>                http://www.postgresql.org/docs/faq

-- 
  Bruce Momjian   http://candle.pha.pa.us
  EnterpriseDB    http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

In response to

pgsql-patches by date

Next:From: Stefan KaltenbrunnerDate: 2006-06-15 19:58:27
Subject: Re: Test request for Stats collector performance improvement
Previous:From: Tom LaneDate: 2006-06-15 19:13:12
Subject: Re: TupleDesc refcounting

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group