diff -rup postgresql-8.4.0-orig/src/backend/access/heap/heapam.c postgresql-8.4.0-compression/src/backend/access/heap/heapam.c
--- postgresql-8.4.0-orig/src/backend/access/heap/heapam.c	2009-06-11 16:48:53.000000000 +0200
+++ postgresql-8.4.0-compression/src/backend/access/heap/heapam.c	2009-08-08 23:13:08.000000000 +0200
@@ -135,6 +135,8 @@ initscan(HeapScanDesc scan, ScanKey key,
 	{
 		if (scan->rs_strategy == NULL)
 			scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
+		
+		scan->rs_readahead_pages = 64;	/* TODO: GUC ? or maybe put it in AccessStrategy ? */
 	}
 	else
 	{
@@ -766,6 +768,12 @@ heapgettup_pagemode(HeapScanDesc scan,
 			if (page == 0)
 				page = scan->rs_nblocks;
 			page--;
+			
+			/*
+			 * do some extra readahead (really needed for compressed files)
+			 */
+			if( scan->rs_readahead_pages && !finished )
+				PrefetchBuffer( scan->rs_rd, MAIN_FORKNUM, page - scan->rs_readahead_pages + ((page >= scan->rs_readahead_pages) ? 0 : scan->rs_nblocks));
 		}
 		else
 		{
@@ -788,6 +796,13 @@ heapgettup_pagemode(HeapScanDesc scan,
 			 */
 			if (scan->rs_syncscan)
 				ss_report_location(scan->rs_rd, page);
+			
+			/*
+			 * do some extra readahead (really needed for compressed files)
+			 */
+
+			if( scan->rs_readahead_pages && !finished )
+				PrefetchBuffer( scan->rs_rd, MAIN_FORKNUM, (page + scan->rs_readahead_pages) % scan->rs_nblocks );
 		}
 
 		/*
@@ -1209,6 +1224,7 @@ heap_beginscan_internal(Relation relatio
 	scan->rs_strategy = NULL;	/* set in initscan */
 	scan->rs_allow_strat = allow_strat;
 	scan->rs_allow_sync = allow_sync;
+	scan->rs_readahead_pages = 0;
 
 	/*
 	 * we can use page-at-a-time mode if it's an MVCC-safe snapshot
diff -rup postgresql-8.4.0-orig/src/backend/storage/page/bufpage.c postgresql-8.4.0-compression/src/backend/storage/page/bufpage.c
--- postgresql-8.4.0-orig/src/backend/storage/page/bufpage.c	2009-01-01 18:23:48.000000000 +0100
+++ postgresql-8.4.0-compression/src/backend/storage/page/bufpage.c	2009-08-07 11:18:47.000000000 +0200
@@ -81,6 +81,8 @@ PageHeaderIsValid(PageHeader page)
 		page->pd_lower <= page->pd_upper &&
 		page->pd_upper <= page->pd_special &&
 		page->pd_special <= BLCKSZ &&
+		(page->pd_compressed_size == 0 										/* check that page is either not compressed */
+		|| (page->pd_compressed_size <= BLCKSZ - SizeOfPageHeaderData )) &&	/* or compressed data actually fits in page */
 		page->pd_special == MAXALIGN(page->pd_special))
 		return true;
 
Seulement dans postgresql-8.4.0-compression/src/backend/storage/smgr: lzoconf.h
Seulement dans postgresql-8.4.0-compression/src/backend/storage/smgr: lzodefs.h
diff -rup postgresql-8.4.0-orig/src/backend/storage/smgr/Makefile postgresql-8.4.0-compression/src/backend/storage/smgr/Makefile
--- postgresql-8.4.0-orig/src/backend/storage/smgr/Makefile	2008-02-19 11:30:08.000000000 +0100
+++ postgresql-8.4.0-compression/src/backend/storage/smgr/Makefile	2009-08-07 15:05:44.000000000 +0200
@@ -12,6 +12,6 @@ subdir = src/backend/storage/smgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = md.o smgr.o smgrtype.o
+OBJS = md.o smgr.o smgrtype.o minilzo.o
 
 include $(top_srcdir)/src/backend/common.mk
diff -rup postgresql-8.4.0-orig/src/backend/storage/smgr/md.c postgresql-8.4.0-compression/src/backend/storage/smgr/md.c
--- postgresql-8.4.0-orig/src/backend/storage/smgr/md.c	2009-06-26 22:29:04.000000000 +0200
+++ postgresql-8.4.0-compression/src/backend/storage/smgr/md.c	2009-08-08 02:06:39.000000000 +0200
@@ -17,6 +17,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/file.h>
+#include <alloca.h>
 
 #include "catalog/catalog.h"
 #include "miscadmin.h"
@@ -28,7 +29,7 @@
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
 #include "pg_trace.h"
-
+#include "minilzo.h"
 
 /* interval for calling AbsorbFsyncRequests in mdsync */
 #define FSYNCS_PER_ABSORB		10
@@ -399,6 +400,235 @@ mdunlink(RelFileNode rnode, ForkNumber f
 		register_unlink(rnode);
 }
 
+/************
+compression
+************/
+
+#define OS_PAGE			0x1000
+#define OS_PAGE_MASK	0xF000
+
+/* when compressing, allow a bit of headroom in case compressed data is larger than original */
+#define COMPRESSION_BUFFER_SIZE (BLCKSZ+OS_PAGE)
+
+static inline off_t block_to_file_offset( BlockNumber blocknum )
+{
+	/* 
+	Compressed blocks can NOT use more space than uncompressed blocks,
+	because if we noticed a block can't be compressed, we'd store it raw.
+	*/
+	off_t r = (off_t)( BLCKSZ ) * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	Assert(r < (off_t) BLCKSZ * RELSEG_SIZE);
+	return r;
+}
+
+/*
+	There is no "how many bytes to read" parameter.
+	It is written in the page header.
+	This function should therefore only be used to decompress BLCKSZ-sized things.
+*/
+static int read_compressed_block( bool expect_compressed, File file, char *buffer )
+{
+	int 		comp_nbytes, remainder, lzo_error, nbytes, nbytes2;
+	char 		*compressed_buffer;
+	
+	lzo_uint	decomp_nbytes = BLCKSZ - SizeOfPageHeaderData;
+	
+	if( !expect_compressed )	/* we don't expect compressed pages, so go the fast path */
+	{
+		nbytes = FileRead(file, buffer, BLCKSZ);
+		
+		if( nbytes != BLCKSZ )	/* incomplete or failed read ? */
+			return nbytes;
+		
+		comp_nbytes = ((PageHeader)buffer)->pd_compressed_size;
+		if( ! comp_nbytes )	/* page is not compressed */
+			return nbytes;
+		
+		/* page was in fact compressed. we must decompress it. */
+		compressed_buffer = alloca( comp_nbytes );
+		memcpy( compressed_buffer, buffer + SizeOfPageHeaderData, comp_nbytes );
+		
+		lzo_error = lzo1x_decompress( 	(void*)compressed_buffer, 
+										comp_nbytes, 
+										(void*)(buffer+SizeOfPageHeaderData), 
+										&decomp_nbytes, 
+										NULL );
+	}
+	else
+	{
+		compressed_buffer = alloca( COMPRESSION_BUFFER_SIZE );
+	
+		/* start by reading the first page of compressed data. */
+		nbytes = FileRead(file, compressed_buffer, OS_PAGE);
+	
+		/*	we should never read less than 8k. if it happens, we'll take care of it later. 
+			but at least we need to read our header */
+		if( nbytes != OS_PAGE )
+			return nbytes;		/* pretend we read 0 bytes to make the caller handle error (or not, if recovery mode) */
+
+		/* get the length of compressed data stream, we stored it here when writing */
+		comp_nbytes = ((PageHeader)compressed_buffer)->pd_compressed_size;
+		
+		if( !comp_nbytes )
+		{
+			/* oops, we expected a compressed page, and got a decompressed page instead... 
+			   copy it to target buffer */
+			memcpy( buffer, compressed_buffer, OS_PAGE );
+			
+			nbytes2 = FileRead(file, buffer+OS_PAGE, BLCKSZ-OS_PAGE ); /*read the rest of page */
+			
+			if( nbytes2 < 0 )	return nbytes2;			/* signal failed read to caller */
+			else				return nbytes + nbytes2;	/* return number of bytes read (sum should be BLCKSZ, caller will check) */
+		}
+	
+		/* did we get everything in our first read ? */
+		remainder = comp_nbytes + SizeOfPageHeaderData - OS_PAGE;
+		if( remainder > 0 )
+		{
+			/* read the rest of the compressed stream now */
+			nbytes = FileRead(file, compressed_buffer+OS_PAGE, remainder );
+			
+			/* an incomplete page is as good as missing */
+			if( nbytes != remainder )
+				return 0;
+		}
+	
+		/* copy page header */
+		memcpy( buffer, compressed_buffer, SizeOfPageHeaderData );
+		
+		/* now we have our compressed data, decompress it. */
+		lzo_error = lzo1x_decompress(	(void*)(compressed_buffer+SizeOfPageHeaderData), 
+										comp_nbytes, 
+										(void*)(buffer+SizeOfPageHeaderData), 
+										&decomp_nbytes, 
+										NULL );
+	}
+	
+	/* common check for lzo return code */
+	/* corrupt data may cause decompression errors, report it */
+	if( lzo_error != LZO_E_OK )
+	{
+		ereport(WARNING,(ERRCODE_DATA_CORRUPTED,errmsg("LZO decompression error %d : %m", lzo_error)));
+		return -1;
+	}
+	
+	if( decomp_nbytes != (BLCKSZ - SizeOfPageHeaderData) )
+		ereport(WARNING,(ERRCODE_DATA_CORRUPTED,errmsg("LZO decompression returned %d bytes instead of %d", (int)decomp_nbytes, BLCKSZ-SizeOfPageHeaderData )));
+	
+	/* decomp_nbytes has been set by lzo1x_decompress */
+	return decomp_nbytes + SizeOfPageHeaderData;
+}
+
+/*
+	There is no "how many bytes to write" parameter.
+	It is encoded just before the compressed stream.
+	This function should therefore only be used to compress BLCKSZ-sized things.
+*/
+static int write_compressed_block( File file, char *buffer )
+{
+	int 			nbytes;
+	lzo_uint	comp_nbytes;
+	int			write_nbytes;
+	char	compressed_buffer[ COMPRESSION_BUFFER_SIZE ];
+	unsigned char	work_buffer[ LZO1X_1_MEM_COMPRESS ];
+	
+	//~ ereport(WARNING,(errcode_for_file_access(),errmsg("compressing block")));
+	
+	/* compress the block (but not the page header) */
+	comp_nbytes = COMPRESSION_BUFFER_SIZE - SizeOfPageHeaderData;
+	
+	lzo1x_1_compress( (void*)(	buffer + SizeOfPageHeaderData ), 
+								BLCKSZ - SizeOfPageHeaderData, 
+								(void*)(compressed_buffer + SizeOfPageHeaderData), 
+								&comp_nbytes, 
+								(void*)work_buffer );
+
+	//~ ereport(WARNING,(errcode_for_file_access(),errmsg("compressed block to %d", comp_nbytes)));
+
+	/* round bytes to write up to next page boundary */
+	write_nbytes = (comp_nbytes + SizeOfPageHeaderData + OS_PAGE - 1) & OS_PAGE_MASK;
+	
+	/* check if compression did compress */
+	if( write_nbytes >= BLCKSZ )
+	{
+		/* compression makes block bigger or same size than original, store original block instead */
+		((PageHeader)buffer)->pd_compressed_size = 0;	/* mark page as uncompressed, bufmgr holds IO_IN_PROGRESS lock so should be OK */
+		return FileWrite(file, buffer, BLCKSZ);
+	}
+	
+	/* copy header to compression buffer */
+	memcpy( compressed_buffer, buffer, SizeOfPageHeaderData );
+		
+	/* store  the length of compressed data stream */
+	((PageHeader)compressed_buffer)->pd_compressed_size = comp_nbytes;
+	
+	/*	TODO : a parameter to specify how much we should write to preallocate
+		blocks in the sparse file */
+	
+	/* ereport(WARNING,(errcode_for_file_access(),errmsg("writing %d", comp_nbytes))); */
+	if( write_nbytes < 16384 )
+		write_nbytes = 16384;
+
+	/* store compressed stream */
+	nbytes = FileWrite(file, compressed_buffer, write_nbytes );
+	
+	/* report error if truncated write */
+	if( nbytes == write_nbytes )
+		return BLCKSZ;
+	else
+		return -1;
+}
+
+static inline bool rel_expect_many_compressed_blocks( SMgrRelation reln )
+{
+	/*	An extremely dirty hack here, to avoid compressing system catalogs.
+		Ideally, there should be a parameter in SMgrRelation that allows us to
+		choose between these behaviours :
+	
+		- new blocks should be compressed or not ?
+		influences behaviour of write_block()
+	
+		- do we expect a majority of compressed blocks in the relation or not ?
+		  -	if we expect a majority of compressed blocks, read_block() should read the first 4K of data 
+			to a temp buffer, then check if it is compressed. 
+			  - If it is compressed, read the rest of the data,
+				(the actual compressed length is contained in the header), and decompress 
+				to buffer passed to read_block(), 
+			  - if it is not compressed, memcpy() to dest buffer and read() BLCKSZ-4K to get the rest
+	
+		  -	if we expect a majority of uncompressed blocks, read_block() should directly read 
+			BLCKSZ bytes of data to the buffer passed to it, then check if it is compressed.
+			  - If it is compressed, memcpy() to temp buffer and decompress back to dest buffer
+			  - If it is not compressed, we're done
+	
+		These simple tests allow everything to work even if a relation contains a mixture of compressed
+		and uncompressed blocks
+	*/	
+	return reln->smgr_rnode.relNode >= 16384;
+}
+
+static inline bool rel_must_write_compressed_blocks( SMgrRelation reln )
+{
+	return reln->smgr_rnode.relNode >= 16384;
+}
+
+static inline int read_block( SMgrRelation reln, File file, char *buffer )
+{
+	return read_compressed_block( rel_expect_many_compressed_blocks( reln ), file, buffer );
+}
+
+static inline int write_block( SMgrRelation reln, File file, char *buffer )
+{
+	if( rel_must_write_compressed_blocks( reln ))	
+		return write_compressed_block( file, buffer );
+	else
+	{
+			((PageHeader)buffer)->pd_compressed_size = 0;	/* mark page as uncompressed, bufmgr holds IO_IN_PROGRESS lock so should be OK */
+			return FileWrite( file, buffer, BLCKSZ );
+	}
+}
+
+
 /*
  *	mdextend() -- Add a block to the specified relation.
  *
@@ -435,9 +665,7 @@ mdextend(SMgrRelation reln, ForkNumber f
 
 	v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
 
-	seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
-
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	seekpos = block_to_file_offset( blocknum );
 
 	/*
 	 * Note: because caller usually obtained blocknum by calling mdnblocks,
@@ -455,7 +683,7 @@ mdextend(SMgrRelation reln, ForkNumber f
 						blocknum,
 						relpath(reln->smgr_rnode, forknum))));
 
-	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
+	if ((nbytes = write_block(reln, v->mdfd_vfd, buffer)) != BLCKSZ)
 	{
 		if (nbytes < 0)
 			ereport(ERROR,
@@ -578,11 +806,10 @@ mdprefetch(SMgrRelation reln, ForkNumber
 
 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
 
-	seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
+	seekpos = block_to_file_offset( blocknum );
 
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	(void) FilePrefetch(v->mdfd_vfd, seekpos, 16384 );	// if block is compressed, this length should be stored somewhere so we can get it easily
 
-	(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ);
 #endif   /* USE_PREFETCH */
 }
 
@@ -605,9 +832,7 @@ mdread(SMgrRelation reln, ForkNumber for
 
 	v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
 
-	seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
-
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	seekpos = block_to_file_offset( blocknum );
 
 	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
 		ereport(ERROR,
@@ -615,7 +840,7 @@ mdread(SMgrRelation reln, ForkNumber for
 				 errmsg("could not seek to block %u of relation %s: %m",
 						blocknum, relpath(reln->smgr_rnode, forknum))));
 
-	nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
+	nbytes = read_block(reln,v->mdfd_vfd, buffer);
 
 	TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
 									   reln->smgr_rnode.spcNode,
@@ -678,9 +903,7 @@ mdwrite(SMgrRelation reln, ForkNumber fo
 
 	v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
 
-	seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
-
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	seekpos = block_to_file_offset( blocknum );
 
 	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
 		ereport(ERROR,
@@ -688,7 +911,7 @@ mdwrite(SMgrRelation reln, ForkNumber fo
 				 errmsg("could not seek to block %u of relation %s: %m",
 						blocknum, relpath(reln->smgr_rnode, forknum))));
 
-	nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ);
+	nbytes = write_block(reln,v->mdfd_vfd, buffer);
 
 	TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum,
 										reln->smgr_rnode.spcNode,
@@ -1592,5 +1815,9 @@ _mdnblocks(SMgrRelation reln, ForkNumber
 			 errmsg("could not seek to end of segment %u of relation %s: %m",
 					seg->mdfd_segno, relpath(reln->smgr_rnode, forknum))));
 	/* note that this calculation will ignore any partial block at EOF */
-	return (BlockNumber) (len / BLCKSZ);
+	
+	//TODO : because of compression we don't ignore partial blocks, fix this
+	return (BlockNumber) ((len+BLCKSZ-1) / (BLCKSZ));
 }
+
+
diff -rup postgresql-8.4.0-orig/src/include/access/relscan.h postgresql-8.4.0-compression/src/include/access/relscan.h
--- postgresql-8.4.0-orig/src/include/access/relscan.h	2009-01-01 18:23:56.000000000 +0100
+++ postgresql-8.4.0-compression/src/include/access/relscan.h	2009-08-08 01:37:19.000000000 +0200
@@ -35,6 +35,7 @@ typedef struct HeapScanDescData
 	BlockNumber rs_startblock;	/* block # to start at */
 	BufferAccessStrategy rs_strategy;	/* access strategy for reads */
 	bool		rs_syncscan;	/* report location to syncscan logic? */
+	int			rs_readahead_pages;	/* if non-zero, issue a Prefetch to get a page rs_readahead_pages ahead of current page */
 
 	/* scan current state */
 	bool		rs_inited;		/* false = scan not init'd yet */
diff -rup postgresql-8.4.0-orig/src/include/pg_config.h postgresql-8.4.0-compression/src/include/pg_config.h
--- postgresql-8.4.0-orig/src/include/pg_config.h	2009-08-07 14:33:32.000000000 +0200
+++ postgresql-8.4.0-compression/src/include/pg_config.h	2009-08-05 19:54:16.000000000 +0200
@@ -35,7 +35,7 @@
    currently 2^15 (32768). This is determined by the 15-bit widths of the
    lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h).
    Changing BLCKSZ requires an initdb. */
-#define BLCKSZ 8192
+#define BLCKSZ 32768
 
 /* Define to the default TCP port number on which the server listens and to
    which clients will try to connect. This can be overridden at run-time, but
@@ -680,7 +680,7 @@
    in the direction of a small limit. A power-of-2 value is recommended to
    save a few cycles in md.c, but is not absolutely required. Changing
    RELSEG_SIZE requires an initdb. */
-#define RELSEG_SIZE 131072
+#define RELSEG_SIZE 32768
 
 /* The size of `off_t', as computed by sizeof. */
 #define SIZEOF_OFF_T 8
diff -rup postgresql-8.4.0-orig/src/include/storage/bufpage.h postgresql-8.4.0-compression/src/include/storage/bufpage.h
--- postgresql-8.4.0-orig/src/include/storage/bufpage.h	2009-01-01 18:24:01.000000000 +0100
+++ postgresql-8.4.0-compression/src/include/storage/bufpage.h	2009-08-07 11:15:29.000000000 +0200
@@ -132,6 +132,9 @@ typedef struct PageHeaderData
 	LocationIndex pd_special;	/* offset to start of special space */
 	uint16		pd_pagesize_version;
 	TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
+	uint16		pd_compressed_size;	/*	the page header is always stored uncompressed. 
+										pd_compressed_size contains either 0 (page is not compressed)
+										or the number of bytes of compressed data that starts at pd_linp */
 	ItemIdData	pd_linp[1];		/* beginning of line pointer array */
 } PageHeaderData;
 
