diff -Ncr postgresql-8.2.1.org/contrib/Makefile postgresql-8.2.1/contrib/Makefile *** postgresql-8.2.1.org/contrib/Makefile 2006-09-09 13:07:51.000000000 +0900 --- postgresql-8.2.1/contrib/Makefile 2007-04-06 17:14:21.000000000 +0900 *************** *** 16,21 **** --- 16,22 ---- intagg \ intarray \ isn \ + lesslog \ lo \ ltree \ oid2name \ diff -Ncr postgresql-8.2.1.org/contrib/README postgresql-8.2.1/contrib/README *** postgresql-8.2.1.org/contrib/README 2006-09-09 13:07:52.000000000 +0900 --- postgresql-8.2.1/contrib/README 2007-04-06 17:14:21.000000000 +0900 *************** *** 68,73 **** --- 68,77 ---- PostgreSQL type extensions for ISBN, ISSN, ISMN, EAN13 product numbers by Germán Méndez Bravo (Kronuz) + lesslog - + Reduce archive log file size by removing unnecessary physical log. + by Koichi Suzuki + lo - Large Object maintenance by Peter Mount diff -Ncr postgresql-8.2.1.org/contrib/lesslog/Makefile postgresql-8.2.1/contrib/lesslog/Makefile *** postgresql-8.2.1.org/contrib/lesslog/Makefile 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/Makefile 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,3 ---- + all install clean: + $(MAKE) -f Makefile.pg_compresslog $@ + $(MAKE) -f Makefile.pg_decompresslog $@ diff -Ncr postgresql-8.2.1.org/contrib/lesslog/Makefile.pg_compresslog postgresql-8.2.1/contrib/lesslog/Makefile.pg_compresslog *** postgresql-8.2.1.org/contrib/lesslog/Makefile.pg_compresslog 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/Makefile.pg_compresslog 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,19 ---- + PROGRAM = pg_compresslog + OBJS = pg_compresslog.o file.o debug.o + + PG_CPPFLAGS = -I$(libpq_srcdir) + PG_LIBS = $(libpq_pgport) $(top_builddir)/src/backend/utils/hash/pg_crc.o + + DOCS = README.lesslog + + ifdef USE_PGXS + PGXS := $(shell pg_config --pgxs) + include $(PGXS) + else + subdir = contrib/pg_compresslog + top_builddir = ../.. + include $(top_builddir)/src/Makefile.global + include $(top_srcdir)/contrib/contrib-global.mk + endif + + $(OBJS): Makefile.pg_compresslog diff -Ncr postgresql-8.2.1.org/contrib/lesslog/Makefile.pg_decompresslog postgresql-8.2.1/contrib/lesslog/Makefile.pg_decompresslog *** postgresql-8.2.1.org/contrib/lesslog/Makefile.pg_decompresslog 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/Makefile.pg_decompresslog 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,19 ---- + PROGRAM = pg_decompresslog + OBJS = pg_decompresslog.o file.o debug.o + + PG_CPPFLAGS = -I$(libpq_srcdir) + PG_LIBS = $(libpq_pgport) $(top_builddir)/src/backend/utils/hash/pg_crc.o + + DOCS = + + ifdef USE_PGXS + PGXS := $(shell pg_config --pgxs) + include $(PGXS) + else + subdir = contrib/pg_decompresslog + top_builddir = ../.. + include $(top_builddir)/src/Makefile.global + include $(top_srcdir)/contrib/contrib-global.mk + endif + + $(OBJS): Makefile.pg_decompresslog diff -Ncr postgresql-8.2.1.org/contrib/lesslog/README.lesslog postgresql-8.2.1/contrib/lesslog/README.lesslog *** postgresql-8.2.1.org/contrib/lesslog/README.lesslog 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/README.lesslog 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,71 ---- + lesslog README 2006/04/06 + + ** What is lesslog? + + lesslog is a set of tools to reduce the size of PostgreSQL archive log. lesslog consists of the following materials. + + - pg_compresslog + This is a command to remove physical log records with "removable" mark. + This command should be specified as archive_command in postgresql.conf. + This command also removes page headers by changing page size from 8kB to + 16MB, which are restored by pg_decompresslog. + + - pg_decompresslog + This command restores page headers and add dummy data to make up for + physical log record, finally restores LSN of each log record and restores + the page size to be used in the archive recovery. This command should be + specified as restore_command in recovery.conf. + + + ** How to use lesslog + 1. Build and install the additional tools. + Move to contrib/lesslog directory, then make and make install. + pg_compresslog and pg_decompresslog will be installed to PostgreSQL install + directory. + 2. Edit postgresql.conf + Edit postgresql.conf which is copied to DB cluster by initdb and edit + parameters as follows. + + full_page_writes = on + wal_add_optimization_info = on + archive_command = 'pg_compresslog "%p" /"%f"' + + ** How to use pg_compresslog + Synopsis + pg_compresslog [from [to]] + + Explanation + pg_compresslog removes physical log from the WAL segment file specified by + and archives as file name. + + if is omitted or specfied as "-", it reads setment file from stdin. + If is omitted or specified as "-", it means stdout. + + Physical log records removed by pg_compresslog are those written while + online backup is not running and both full_page_writes and + wal_add_optimization_info are "on". + + To use the output of pg_compresslog command in archive recovery, it must be + restored using pg_decompresslog command. + + Return value + pg_compresslog returns zero if no error occurs, 0 if error occurs. + + ** How to use pg_decompresslog + Synopsis + pg_decompresslog [from [to]] + + Explanation + pg_decompresslog reads archive log file specified by argument and + restores an area corresponds to the removed physical log, which restores + LSN of each log record, and writes them to the file specified by + argument. + + If is omitted or specified as "-", it reads from stdin. If is + omitted of specified as "-", it writes to stdout. + + You can specifiy the file written by pg_compresslog as argument. + + Return value + It returns zero if no error occurs, 1 if error occurs. + diff -Ncr postgresql-8.2.1.org/contrib/lesslog/debug.c postgresql-8.2.1/contrib/lesslog/debug.c *** postgresql-8.2.1.org/contrib/lesslog/debug.c 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/debug.c 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,125 ---- + /* + * debug.c + * Debug dump function implementation. + */ + #include + #include + + #include "postgres.h" + #include "access/xlog.h" + #include "access/xlog_internal.h" + + void get_segment_id(const char *filename); + void dump_record(XLogRecPtr *ptr, size_t off, XLogRecord *precord); + void dump_page_header(int num, XLogPageHeader pheader); + void dumpXLogRecord(XLogRecPtr *ptr, size_t off, XLogRecord *record); + + /* Current segment ID */ + static uint32 segment_id; + + /* List for the resource manager. */ + static const char * const RM_names[RM_MAX_ID + 1] = { + "XLOG ", /* 0 */ + "XACT ", /* 1 */ + "SMGR ", /* 2 */ + "CLOG ", /* 3 */ + "DBASE", /* 4 */ + "TBSPC", /* 5 */ + "MXACT", /* 6 */ + "RM 7", /* 7 */ + "RM 8", /* 8 */ + "RM 9", /* 9 */ + "HEAP ", /* 10 */ + "BTREE", /* 11 */ + "HASH ", /* 12 */ + "RTREE", /* 13 */ + "GIST ", /* 14 */ + "SEQ " /* 15 */ + }; + + /* + * Obtain segment ID from WAL segment file name. + * + * Parameters: + * filename: WAL segment file name. + * + * Note: If no slash mark (path delimiter) is included in the argument, or if + * the argument does not follow WAL segment file name format, nothing will + * happen. + */ + void + get_segment_id(const char *filename) + { + TimeLineID tli; + uint32 xlogid; + char *p; + p = strrchr(filename, '/'); + if (!p) + return; + p++; + if (sscanf(p, "%08X%08X%08X", &tli, &xlogid, &segment_id) != 3) + return; + } + + /* + * Dump the page header content. + * + * Paramters: + * num: Page number to be included in the dump output. + * page: Target page. + */ + void + dump_page_header(int num, XLogPageHeader page) + { + printf("=[%04d]==================================================\n", num); + printf("PAGE: xlp_magic=%02X\n", page->xlp_magic); + printf("PAGE: xlp_info=%02X\n", page->xlp_info); + printf("PAGE: xlp_tli=%u\n", page->xlp_tli); + printf("PAGE: xlogid=%u\n", page->xlp_pageaddr.xlogid); + printf("PAGE: xrecoff=%u\n", page->xlp_pageaddr.xrecoff); + if (page->xlp_info & XLP_FIRST_IS_CONTRECORD) + { + XLogContRecord *cont = + (XLogContRecord *)((char *)page + XLogPageHeaderSize(page)); + printf("PAGE: rem_len=%u\n", cont->xl_rem_len); + } + printf("=========================================================\n"); + } + + /* + * Dump record header content in xlogdump format. + * + * Parameters: + * ptr: Record position information (only log ID will be used). + * off: Record offset within the segment. + * record: Pointer to the record. + * + * Note: the source is copied and modified using xlogdump source. + */ + void + dumpXLogRecord(XLogRecPtr *ptr, size_t off, XLogRecord *record) + { + static XLogRecPtr prevRecPtr = { 0, 0}; + + printf("%u/%08X: prv %u/%08X", + ptr->xlogid, (uint32)off + segment_id * XLOG_SEG_SIZE, + record->xl_prev.xlogid, record->xl_prev.xrecoff); + + if (!XLByteEQ(record->xl_prev, prevRecPtr)) + printf("(?)"); + prevRecPtr.xlogid = ptr->xlogid; + prevRecPtr.xrecoff = (uint32)off + segment_id * XLOG_SEG_SIZE; + + printf("; xid %u; ", record->xl_xid); + + if (record->xl_rmid <= RM_MAX_ID) + printf("%s", RM_names[record->xl_rmid]); + else + printf("RM %2d", record->xl_rmid); + + printf(" info %02X len %u tot_len %u\n", record->xl_info, + record->xl_len, record->xl_tot_len); + + fflush(stdout); + } + diff -Ncr postgresql-8.2.1.org/contrib/lesslog/debug.h postgresql-8.2.1/contrib/lesslog/debug.h *** postgresql-8.2.1.org/contrib/lesslog/debug.h 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/debug.h 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,28 ---- + /* + * debug.h + * Interface for debug dump function. + */ + #ifndef DEBUG_H_INCLUDED + #define DEBUG_H_INCLUDED + + #include "access/xlog.h" + #include "access/xlog_internal.h" + + /* + * In the release, debug function call itself will be eliminated. + */ + #ifdef DEBUG + + void get_segment_id(const char *filename); + void dump_page_header(int num, XLogPageHeader pheader); + void dumpXLogRecord(XLogRecPtr *ptr, size_t off, XLogRecord *record); + + #else + + #define get_segment_id(a) + #define dump_page_header(a, b) + #define dumpXLogRecord(a, b, c) + + #endif + + #endif diff -Ncr postgresql-8.2.1.org/contrib/lesslog/file.c postgresql-8.2.1/contrib/lesslog/file.c *** postgresql-8.2.1.org/contrib/lesslog/file.c 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/file.c 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,180 ---- + /* + * file.c + * Common I/O routine implementation used in archive/restoration + */ + #include + #include + #include + #include + #include + #include + #include + #include + + #include "postgres.h" + #include "access/xlog_internal.h" + + #include "file.h" + + /* + * Read the file data and return the size actually read. + * The length to read is specified by an argument. + * + * Parameters: + * fd: File descriptor. + * buff: Buffer to read. + * len: Size to read. + * + * Note: If error occurd, exit(2) will be called here and will not return to the + * caller in this case. + */ + int + read_buff(int fd, char *buff, size_t len) + { + int ret; + size_t read_len = 0; + + do + { + ret = read(fd, buff + read_len, len - read_len); + if (ret < 0) + { + if (errno == EINTR) + continue; + fprintf(stderr, "failed to read : %s\n", strerror(errno)); + exit(1); + } + else if (ret == 0) + break; + read_len += ret; + } while (read_len < len); + + return read_len; + } + + /* + * Write to the file and return length actually written. + * The length to write should be specified by an argument. + * + * Parameter: + * fd: File descriptor. + * buff: Buffer to write. + * len: Size to write. + * + * Note: If an error occurs, exit(2) will be called in this function and will + * not return to the caller in this case. + */ + void + write_buff(int fd, const char *buff, size_t len) + { + int ret; + int written_len = 0; + + do + { + ret = write(fd, buff + written_len, len - written_len); + if (ret < 0) + { + if (errno == EINTR) + continue; + fprintf(stderr, "failed to write : %s\n", strerror(errno)); + exit(1); + } + written_len += ret; + } while (written_len < len); + } + + /* + * Copy the contents of the file. + * + * Parameter: + * from_fd: File descriptor of the file to copy from. + * to_fd: File descriptor of the file to copy to. + * + * Note: If error occurs in this function, exit(2) will be called here and will + * not return to the caller in this case. + */ + void + copy_file(int from_fd, int to_fd) + { + int read_len = 0; + char buff[8 * 1024]; /* 8KB buffer */ + + while (1) + { + /* Read to the buffer. */ + read_len = read(from_fd, buff, sizeof(buff)); + if (read_len < 0) + { + if (errno == EINTR) + continue; + fprintf(stderr, "failed to read : %s\n", strerror(errno)); + exit(1); + } + else if (read_len == 0) + break; + /* Write all the buffer content. */ + write_buff(to_fd, buff, read_len); + } + + return; + } + + /* + * Validate the record by comparing CRC value. + * + * CRC value will be calculated in the following order. + * - Logical Log + * - Full page write (if exists) + * - Record header (exluding the CRC area) + * + * Parameters: + * precord: Pointer to the target record. + */ + bool + is_valid_record(XLogRecord *precord) + { + pg_crc32 crc; + BkpBlock *pblk; + int i; + + /* Calculate CRC for a logical log. */ + INIT_CRC32(crc); + COMP_CRC32(crc, XLogRecGetData(precord), precord->xl_len); + + /* + * If full page writes exist, calculate CRC for each full page write. + */ + pblk = (BkpBlock *)((char *)XLogRecGetData(precord) + precord->xl_len); + for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) + { + uint32 blen; + + if (!(precord->xl_info & XLR_SET_BKP_BLOCK(i))) + continue; + + if (pblk->hole_offset + pblk->hole_length > BLCKSZ) + { + fprintf(stderr, "incorrect hole size in record.\n"); + return false; + } + + blen = sizeof(BkpBlock) + BLCKSZ - pblk->hole_length; + COMP_CRC32(crc, (char *)pblk, blen); + pblk = (BkpBlock *)((char *)pblk + blen); + } + + /* Calculate record header CRC value. */ + COMP_CRC32(crc, (char *)precord + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + + /* Examine if the final CRC is the same as the value found in the record. */ + FIN_CRC32(crc); + if (!EQ_CRC32(precord->xl_crc, crc)) + { + fprintf(stderr, "incorrect resource manager data checksum.\n"); + return false; + } + + return true; + } diff -Ncr postgresql-8.2.1.org/contrib/lesslog/file.h postgresql-8.2.1/contrib/lesslog/file.h *** postgresql-8.2.1.org/contrib/lesslog/file.h 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/file.h 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,28 ---- + /* + * file.h + * Common file I/O routines for pg_archive and pg_restore. + */ + #ifndef FILE_H_INCLUDED + #define FILE_H_INCLUDED + + #include "postgres.h" + #include "access/xlog.h" + + int read_buff(int fd, char *buff, size_t len); + void write_buff(int fd, const char *buff, size_t len); + void copy_file(int from_fd, int to_fd); + bool is_valid_record(XLogRecord *precord); + + /* + * Check if the page header in the buffer is valid. + */ + #define IS_WAL_FILE(buff) \ + (((XLogPageHeader)(buff))->xlp_magic == XLOG_PAGE_MAGIC) + + /* + * Check if the record is log switch WAL record. + */ + #define IS_XLOG_SWITCH(rec) \ + ((rec)->xl_rmid == RM_XLOG_ID && (rec)->xl_info == XLOG_SWITCH) + + #endif /* !FILE_H_INCLUDED */ diff -Ncr postgresql-8.2.1.org/contrib/lesslog/pg_compresslog.c postgresql-8.2.1/contrib/lesslog/pg_compresslog.c *** postgresql-8.2.1.org/contrib/lesslog/pg_compresslog.c 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/pg_compresslog.c 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,450 ---- + /* + * pg_compresslog.c + * Implementation of the archive command (pg_compresslog). + */ + #include + #include + #include + #include + #include + #include + + #include "postgres.h" + #include "access/xlog.h" + #include "access/xlog_internal.h" + #include "catalog/pg_control.h" + + #include "file.h" + #include "debug.h" + + /* ============================================================================= + * Global variables + * ===========================================================================*/ + + /* Buffer to read WAL segment file. */ + static char xlog_buff[XLogSegSize]; + /* Buffer to hold archive log file image with physical log removed. */ + static char arch_buff[XLogSegSize]; + + static int cont_log_size; /* Log size considering the former segment. */ + static int logical_log_size; /* Total size of the logical log. */ + static int physical_log_size; /* Total size of the physical log. */ + + /* ============================================================================= + * Prototype declaration + * ===========================================================================*/ + static void print_usage(int code); + static int open_xlog_file(int argc, char *argv[]); + static int open_arch_file(int argc, char *argv[]); + int create_arch_image(const char *from, char *to); + static bool remove_bkp_block(XLogRecord *record); + + /* ============================================================================= + * Macros + * ===========================================================================*/ + /* Check if the physical log can be removed. */ + #define IS_REMOVABLE(record) \ + (((record)->xl_info & XLR_BKP_BLOCK_MASK) && \ + ((record)->xl_info & XLR_BKP_REMOVABLE)) + + /* ============================================================================= + * Function definitions + * ===========================================================================*/ + /* + * Entry point of pg_compresslog command. + */ + int + main(int argc, char *argv[]) + { + int from_fd = -1; + int to_fd = -1; + size_t xlog_len; + size_t arch_len; + + /* Error if there are more argument(s) other than from and to. */ + if (argc > 3) + print_usage(1); + + /* Open WAL segment file to archive. */ + from_fd = open_xlog_file(argc, argv); + + /* + * If input file is not stdin, check the size of the file. + * If the size is not 16MB, then the specified file is not the WAL + * segment file. We're not sure if we can scan the file to find + * removable physical log, copy the whole file and then exits. + */ + if (from_fd != fileno(stdin)) + { + struct stat st; + + if (fstat(from_fd, &st) < 0) + { + fprintf(stderr, "failed to stat `%s': %s\n", argv[1], + strerror(errno)); + exit(1); + } + if (st.st_size != XLogSegSize) + { + to_fd = open_arch_file(argc, argv); + copy_file(from_fd, to_fd); + exit(0); + } + } + + /* + * Read all the data from WAL segment file to archive. + * If the amount of the data is not sufficient (less than 16MB: XLogSegSize) + * or header is not valid, specified input file is not a WAL segment file. + * Copy the whole input to the output and then exit. + */ + xlog_len = read_buff(from_fd, xlog_buff, XLogSegSize); + if (xlog_len != XLogSegSize || !IS_WAL_FILE(xlog_buff)) + { + /* Write the checked header part and then copy the rest of the input file. */ + to_fd = open_arch_file(argc, argv); + write_buff(to_fd, xlog_buff, xlog_len); + copy_file(from_fd, to_fd); + if (close(from_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[1], + strerror(errno)); + exit(1); + } + exit(0); + } + if (close(from_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[1], strerror(errno)); + exit(1); + } + + /* + * Build the entire compressed output file image on the buffer, + * removing physical logs, then write the whole compressed file image. + */ + arch_len = create_arch_image(xlog_buff, arch_buff); + to_fd = open_arch_file(argc, argv); + write_buff(to_fd, arch_buff, arch_len); + if (close(to_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[2], strerror(errno)); + exit(1); + } + + exit(0); + } + + /* + * Show the usage of the command and then exits with specified code. + */ + static void + print_usage(int code) + { + printf( + "usage: pg_compresslog [from [to]]\n" + " from - Input file name (stdin if omitted or '-' is given)\n" + " to - Output file name (stdiout if omitted or '-' is given)\n" + ); + exit(code); + } + + /* + * Build archive log file image of 16MB, from WAL segment buffer image of + * 8kB pages. And return size of archive log file image. + * + * Parameters: + * from: WAL segment buffer page + * to: Archive log file image + * + * Note: exit() will be called here when a error is detected. In the case of + * error, the control will not be given to the caller. + */ + int + create_arch_image(const char *from, char *to) + { + const char *read_pos = from; + char *write_pos = to; + const char *crrpage = from; + XLogPageHeader page = (XLogPageHeader)from; + XLogRecord *rec = NULL; + XLogRecord *write_rec = NULL; + + /* + * Copy the first page header of the segment to the buffer. + * If the record is the successor of the last record of the former segment, + * then copies XLogContRecord too. + * XLogContRecord.xl_rem_len means the total data length of the + * continuation record, not the length of the record in the given page. + * Therefore, this value is not influenced by the change of the page size. + */ + read_pos = crrpage = from; + memcpy(write_pos, read_pos, XLogPageHeaderSize(page)); + write_pos += XLogPageHeaderSize(page); + read_pos += XLogPageHeaderSize(page); + if (page->xlp_info & XLP_FIRST_IS_CONTRECORD) + { + memcpy(write_pos, read_pos, SizeOfXLogContRecord); + write_pos += SizeOfXLogContRecord; + } + + /* + * Loop page by page. + */ + for (crrpage = from; crrpage < from + XLogSegSize; crrpage += XLOG_BLCKSZ) + { + XLogRecPtr ptr; + + /* Parse the page header. */ + page = (XLogPageHeader)crrpage; + read_pos = crrpage + XLogPageHeaderSize(page); + ptr = page->xlp_pageaddr; + + /* If there is a continuous data, copy them to the write buffer. */ + if (page->xlp_info & XLP_FIRST_IS_CONTRECORD) + { + int cont_len = ((XLogContRecord *)read_pos)->xl_rem_len; + int copy_len = cont_len; + int free_len = XLOG_BLCKSZ - + (read_pos + SizeOfXLogContRecord - crrpage); + /* + * Copy the continuous data within this page. + * xl_rem_len specifies the length of the continuous data after this page, + * so this may be larger than the length of the rest of this page. + */ + if (copy_len > free_len) + copy_len = free_len; + memcpy(write_pos, read_pos + SizeOfXLogContRecord, copy_len); + read_pos += MAXALIGN(SizeOfXLogContRecord + copy_len); + write_pos += copy_len; + if (!rec) + cont_log_size += copy_len; + + /* + * If the data continues to the next page and no record header + * exists in this file, then switch to the next page. + */ + if (cont_len != copy_len) + continue; + + /* + * Set the write position to the end of the current record, + * considering alignment. + */ + write_pos = to + MAXALIGN(write_pos - to); + + /* + * If the record should have a header in this segment (not a continuous + * record from the last segment), perform CRC check and check if + * physical log record can be removed. + */ + if (write_rec) + { + /* Check if the record is valid. */ + if (!is_valid_record(write_rec)) + exit(1); + + /* + * Determine if the physical log can be removed. + * If it can be removed, then rewind the position for the next log record + * to the position of the physical log (plus padding). + */ + if (remove_bkp_block(write_rec)) + write_pos = (char *)write_rec + + MAXALIGN(SizeOfXLogRecord + rec->xl_len); + } + } + + /* Read the data within the page record by record. */ + while(read_pos <= crrpage + XLOG_BLCKSZ - SizeOfXLogRecord) + { + int freespace = XLOG_BLCKSZ - (read_pos - crrpage); + + /* Obtain the record header info. */ + rec = (XLogRecord *)read_pos; + write_rec = (XLogRecord *)write_pos; + logical_log_size += rec->xl_len; + physical_log_size += + rec->xl_tot_len - (SizeOfXLogRecord + rec->xl_len); + dumpXLogRecord(&ptr, read_pos - from, rec); + + /* + * If the record continues to the following pages, copy only the portion + * in this page and then switch to the next page. + */ + if (rec->xl_tot_len > freespace) + { + /* Copy the log data only in the current page. */ + memcpy(write_pos, read_pos, freespace); + /* read_pos will be overwritten at the next loop. We don't need to update this here. */ + write_pos += freespace; + break; + } + + /* Copy the record data to the archive buffer. */ + memcpy(write_pos, read_pos, rec->xl_tot_len); + read_pos += MAXALIGN(rec->xl_tot_len); + write_pos += MAXALIGN(rec->xl_tot_len); + + /* Check if the record is valid using CRC in the record header. */ + if (!is_valid_record(write_rec)) + exit(1); + + /* + * Log record other than log switch must have it's logical data. + * See the comment around the line 3065 of src/backend/access/transam/xlog.c + * (8.2.0).a + */ + if (IS_XLOG_SWITCH(write_rec)) + { + if (write_rec->xl_len != 0) + { + fprintf(stderr, "invalid xlog switch record.\n"); + exit(1); + } + } + else if (write_rec->xl_len == 0) + { + fprintf(stderr, "invalid record length.\n"); + exit(1); + } + + /* If the log record is the log switch record, then no more log record exists + * in * the input file. Exit. + */ + if (IS_XLOG_SWITCH(write_rec)) + return write_pos - to; + + /* + * If the physical log is removable, then rewind the position of the next + * record to + * the physical log start position (and padding). + */ + if (remove_bkp_block(write_rec)) + write_pos = (char *)write_rec + + MAXALIGN(SizeOfXLogRecord + write_rec->xl_len); + else + write_pos = to + MAXALIGN(write_pos - to); + + } + } + + return (write_pos - to); + } + + /* + * Remove the physical log which was marked `REMOVABLE'. + * Return true if the physical record has been removed, false otherwise. + */ + static bool + remove_bkp_block(XLogRecord *record) + { + pg_crc32 crc; + + /* + * If no record is specified or the physical log is not removable, just + * return. + */ + if (!record || !IS_REMOVABLE(record)) + return false; + + /* + * Reset XLR_BKP_BLOCK_MASK. + * We need the flag to show the physical log is removable to restore + * removed physical log with a dummy. It is not reset. + */ + record->xl_info &= ~XLR_BKP_BLOCK_MASK; + + /* + * Record contents changes by physical log removal and CRC has to be + * recalculated. + * CRC will be accumulated as follows: + * 1. Logical log + * 2. Physical log (It has ben removed and we don't calculate its CRC here). + * 3. WAL record header excluding CRC part + * Please refer to the line 2817 of RecordIsValid(), src/backend/access/transam/xlog.c. + */ + INIT_CRC32(crc); + COMP_CRC32(crc, XLogRecGetData(record), record->xl_len); + COMP_CRC32(crc, (char *)record + sizeof(pg_crc32), + SizeOfXLogRecord - sizeof(pg_crc32)); + FIN_CRC32(crc); + record->xl_crc = crc; + + return true; + } + + /* + * Open the WAL segment file to archive and return file descriptor. + * + * The first argument of pg_compresslog will be regarded as an input file. + * If omitted or specified as "-", stdin will be used as an input file. + * + * Parameters: + * argc: Number of arguments (argument to main() will be passed as is). + * argv: Array of pointers to argument strings (argument to main() will be + * passed as is). + * + * Note: exit() will be called here if error occurs. Will not return to the + * caller in this case. + */ + static int + open_xlog_file(int argc, char *argv[]) + { + int from_fd = -1; + + if (argc > 1 && strcmp(argv[1], "-") != 0) + { + /* Open WAL segment file to archive. */ + from_fd = open(argv[1], O_RDONLY, 0); + if (from_fd < 0) + { + fprintf(stderr, "failed to open `%s': %s\n", argv[1], + strerror(errno)); + exit(1); + } + + /* Obtain segment ID from the file name (for record dump). */ + get_segment_id(argv[1]); + } + else + from_fd = fileno(stdin); + + return from_fd; + } + + /* + * Open the archive segment file to write the result and return file descriptor. + * + * The second argument to pg_compresslog will be regarded as an output file. + * If omitted or specified as "-", stdout will be used as an output file. + * + * Parameters: + * argc: Number of arguments (argument to main() will be passed as is). + * argv: Array of pointers to argument strings (argument to main() will be + * + * Note: When an error occurs within this function, exit() will be called here + * and will not return to the caller in this case. + */ + static int + open_arch_file(int argc, char *argv[]) + { + int to_fd = -1; + + if (argc > 2 && strcmp(argv[2], "-") != 0) + { + /* Open the archive log file. */ + to_fd = open(argv[2], O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + if (to_fd < 0) + { + fprintf(stderr, "failed to open `%s': %s\n", argv[2], + strerror(errno)); + exit(1); + } + } + else + to_fd = fileno(stdout); + + return to_fd; + } diff -Ncr postgresql-8.2.1.org/contrib/lesslog/pg_decompresslog.c postgresql-8.2.1/contrib/lesslog/pg_decompresslog.c *** postgresql-8.2.1.org/contrib/lesslog/pg_decompresslog.c 1970-01-01 09:00:00.000000000 +0900 --- postgresql-8.2.1/contrib/lesslog/pg_decompresslog.c 2007-04-06 17:14:21.000000000 +0900 *************** *** 0 **** --- 1,543 ---- + /* + * file pg_decompresslog.c + * Implementation of the archive restore command (pg_decompresslog). + */ + #include + #include + #include + #include + #include + + #include "postgres.h" + #include "access/xlog.h" + #include "access/xlog_internal.h" + #include "catalog/pg_control.h" + + #include "file.h" + #include "debug.h" + + /* ============================================================================= + * Global variables + * ===========================================================================*/ + + /* Buffer to hold restored WAL segment file image. */ + static char xlog_buff[XLogSegSize]; + /* Buffer to read an archive log file. */ + static char arch_buff[XLogSegSize]; + /* Position to write a record data. */ + static char *write_pos = xlog_buff; + /* Position to read record data in the archive log buffer. */ + static char *read_pos = arch_buff; + /* This holds data in the first page header of the segment. */ + static XLogPageHeaderData baseheader; + + /* ============================================================================= + * Prototype declaration + * ===========================================================================*/ + static void print_usage(int code); + int create_wal_image(int arch_len); + static int write_record(char *record_buff, int rem_len, bool isFromPrevSeg); + static int get_freespace(void); + static void insert_XLogContRecord(char *write_pos, int rem_len); + static void insert_pageheader(char *write_pos, XLogPageHeader pheader, + bool hasContRecord); + static int open_arch_file(int argc, char *argv[]); + static int open_xlog_file(int argc, char *argv[]); + + /* ============================================================================= + * Function definitions + * ===========================================================================*/ + + /* + * Entry point of pg_decompresslog command. + */ + int + main(int argc, char *argv[]) + { + int from_fd = -1; + int to_fd = -1; + size_t arch_len; + + /* Error if argument(s) other than , and are given. */ + if (argc > 3) + print_usage(1); + + /* Open the archive log file to restore. */ + from_fd = open_arch_file(argc, argv); + + /* + * Read all the data in the input archive log file. + * If the header at the first page is not valid, it is not a WAL segment file + * and then copy the whole input file to the output file. + */ + arch_len = read_buff(from_fd, arch_buff, XLogSegSize); + if (!IS_WAL_FILE(arch_buff)) + { + /* Write what is read for header validation check and then copy the rest of the input file. */ + to_fd = open_xlog_file(argc, argv); + write_buff(to_fd, arch_buff, arch_len); + copy_file(from_fd, to_fd); + if (close(from_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[1], + strerror(errno)); + exit(1); + } + exit(0); + } + if (close(from_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[1], strerror(errno)); + exit(1); + } + + /* + * Build the restored WAL segment file image. + * Write all the restored WAL segment file image. + */ + if (create_wal_image((int)arch_len)) + { + fprintf(stderr, "failed to create the image of `%s'\n", argv[1]); + exit(1); + } + to_fd = open_xlog_file(argc, argv); + write_buff(to_fd, xlog_buff, XLogSegSize); + if (close(to_fd) < 0) + { + fprintf(stderr, "failed to close `%s': %s\n", argv[2], strerror(errno)); + exit(1); + } + + exit(0); + } + + /* + * Show the usage of the command and exit with specified code. + */ + static void + print_usage(int code) + { + printf( + "usage: pg_decompresslog [from [to]]\n" + " from - Iput file name (stdin if omitted or specified as '-')\n" + " to - Output file name (stdout if omitted or specified as '-')\n" + ); + exit(code); + } + + /* + * Restore 8KB page WAl segment file image from 16MB page archive log build by + * pg_compresslog command. + * + * Parameters: + * arch_len: Size of the archive log file. + */ + int + create_wal_image(int arch_len) + { + /* Buffer holding one record data. */ + static char record_buff[XLogSegSize]; + XLogPageHeader pheader; + XLogContRecord *pcontrec = NULL; + XLogRecord *precord = NULL; + char *rec_write_pos = record_buff; + int rec_len = 0; + bool isFromPrevSeg = false; + + /* + * Copy the archive log file page header and hold info in the header. + * They are used to restore page headers of WAL segment file. + */ + pheader = (XLogPageHeader)arch_buff; + if (XLogPageHeaderSize(pheader) != SizeOfXLogLongPHD) + { + fprintf(stderr, "invalid pageheader size.\n"); + return -1; + } + memcpy(write_pos, (char *)pheader, SizeOfXLogLongPHD); + read_pos += SizeOfXLogLongPHD; + write_pos += SizeOfXLogLongPHD; + + baseheader.xlp_magic = pheader->xlp_magic; + baseheader.xlp_info &= ~XLP_ALL_FLAGS; + baseheader.xlp_tli = pheader->xlp_tli; + baseheader.xlp_pageaddr.xlogid = pheader->xlp_pageaddr.xlogid; + baseheader.xlp_pageaddr.xrecoff = pheader->xlp_pageaddr.xrecoff; + + /* + * Copy XLogContRecord and the continuous record to the record buffer, if there is + * a continuous record from the last segment file. Then move them to WAL segment + * file image buffer. + */ + if (pheader->xlp_info & XLP_FIRST_IS_CONTRECORD) + { + pcontrec = (XLogContRecord *)read_pos; + + /* If the size of the continue record is not valid, it's an error. */ + if (pcontrec->xl_rem_len == 0) + { + printf("invalid continue record length : xl_rem_len = %u\n", + pcontrec->xl_rem_len); + return -1; + } + + memcpy(write_pos, read_pos, SizeOfXLogContRecord); + write_pos += SizeOfXLogContRecord; + rec_len = pcontrec->xl_rem_len; + memcpy(rec_write_pos, (read_pos + SizeOfXLogContRecord), rec_len); + read_pos += MAXALIGN(SizeOfXLogContRecord + rec_len); + isFromPrevSeg = true; + + /* Write the continuous data to WAL segment file image buffer. */ + if (write_record(record_buff, rec_len, isFromPrevSeg)) + return 0; + } + isFromPrevSeg = false; + dump_page_header(((write_pos - xlog_buff) / XLOG_BLCKSZ), + (XLogPageHeader)xlog_buff); + + /* + * Loop record by record, and build each record image in the record buffer. + */ + while ((read_pos - arch_buff) < arch_len) + { + /* Set the write position of the record data. */ + rec_write_pos = record_buff; + precord = (XLogRecord *)read_pos; + + /* + * If the record data fits in the current segment, validate the record. + * If WAL record cotinues to the next segment, we cannot calculate CRC for + * the whole record and skip the validation. + */ + if ((char *)precord - arch_buff + precord->xl_tot_len <= arch_len) + if (!is_valid_record(precord)) + exit(1); + + /* + * Record other than the log switch must have corresponding logical data. + * Refer to the comment around the line 3056 in src/backend/access/transam/xlog.c (8.2.0). + */ + if (IS_XLOG_SWITCH(precord)) + { + if (precord->xl_len != 0) + { + fprintf(stderr, "invalid xlog switch record.\n"); + exit(1); + } + } + else if (precord->xl_len == 0) + { + fprintf(stderr, "invalid record length.\n"); + exit(1); + } + + /* + * Copy the record header and the logical log to the record buffer. + * We don't move read_pos here to cauculate the alignment considering + * physical log. + */ + memcpy(rec_write_pos, read_pos, (SizeOfXLogRecord + precord->xl_len)); + rec_write_pos += (SizeOfXLogRecord + precord->xl_len); + rec_len = precord->xl_tot_len; + + /* Copy the physical log, or restore it. */ + if (precord->xl_tot_len > SizeOfXLogRecord + precord->xl_len) + { + /* + * If physical log does exist (not removed), then simply copy it. + * If physical log is removed, then build a dummy. + */ + if (precord->xl_info & XLR_BKP_BLOCK_MASK) + { + memcpy(rec_write_pos, + XLogRecGetData((XLogRecord *)read_pos) + precord->xl_len, + precord->xl_tot_len - (SizeOfXLogRecord + precord->xl_len)); + read_pos += MAXALIGN(precord->xl_tot_len); + } + else + { + /* + * Because full page write flag will be omitted during the archiving, CRC check + * should be performed only against the record header and the logical log. + * Therefore, we don't have to recalculate CRC value here. + */ + memset(rec_write_pos, '\0', + precord->xl_tot_len - (SizeOfXLogRecord + precord->xl_len)); + read_pos += MAXALIGN(SizeOfXLogRecord + precord->xl_len); + } + } + else + { + /* If theres not physical log in the original log, simply updates the input position. */ + read_pos += MAXALIGN(precord->xl_tot_len); + } + + /* + * Write a record image to the WAL segment image buffer. Hey, the page size + * is again 8kB as the original WAL. + */ + if (write_record(record_buff, rec_len, isFromPrevSeg)) + break; + + } + + return 0; + } + + /* + * Build a record image to fit in 8kB page to WAL segment image buffer. + * + * Return value: + * 0: One record data build complete. + * 1: Hit the tail of the segment. + */ + static int + write_record(char *record_buff, int rem_len, bool isFromPrevSeg) + { + char *phead_pos = NULL; + char *rec_read_pos = record_buff; /* Read position in the record buffer. */ + char *rec_head_pos = NULL; + int freespace; /* Size of the free space in the page. */ + bool hasContRecord = isFromPrevSeg; + + /* + * Hold the position of the record header (or XLogContRecord). + * It is needed for an alignment. + */ + rec_head_pos = write_pos; /* Set the write position of the record data. */ + if (isFromPrevSeg) + rec_head_pos -= SizeOfXLogContRecord; + + freespace = get_freespace(); + + /* + * If free space size is the same as the page size, it means that the last record restoration + * filled the last page. So we add page header here. + * Because the record is complete in the last page, we don't need XLogContRecord. + * Page header at the top of the segment must have had written at the first call of this function. + * So we always add short format header here. + */ + if (freespace == XLOG_BLCKSZ) + { + phead_pos = write_pos; + insert_pageheader(write_pos, &baseheader, false); + write_pos += SizeOfXLogShortPHD; + freespace = (XLOG_BLCKSZ - SizeOfXLogShortPHD); + rec_head_pos = write_pos; + dump_page_header(((write_pos - xlog_buff) / XLOG_BLCKSZ), + (XLogPageHeader)phead_pos); + } + + /* + * Loop page by page. + */ + while(1) + { + /* + * If the record header does not fit the page, then insert a page header to the next + * page and copy the record data. + */ + if (!hasContRecord && freespace < SizeOfXLogRecord) + { + write_pos += freespace; + } + else if (freespace < rem_len) + { + /* + * If the record data does not fit the page, fill this page with the former + * part of the record, copy the rest to the next page, insert a page header + * and XLogContRecord. + * the next page. + */ + memcpy(write_pos, rec_read_pos, freespace); + if (!hasContRecord) + dumpXLogRecord(&baseheader.xlp_pageaddr, + (size_t)(rec_head_pos - xlog_buff), + (XLogRecord *)rec_head_pos); + write_pos += freespace; + rec_read_pos += freespace; + rem_len -= freespace; + hasContRecord = true; + } + else + { + /* + * If th recor data fits to the page, copy the whole record data to the + * buffer and switch to the next record. + */ + int len; + memcpy(write_pos, rec_read_pos, rem_len); + if (!hasContRecord) + dumpXLogRecord(&baseheader.xlp_pageaddr, + (size_t)(rec_head_pos - xlog_buff), + (XLogRecord *)rec_head_pos); + + /* + * Alignment handling. + * Alignment has to be adjusted for each record. + */ + if (hasContRecord) + len = MAXALIGN(SizeOfXLogContRecord + rem_len); + else + len = MAXALIGN(rem_len); + write_pos = rec_head_pos + len; + hasContRecord = false; + + break; + } + + /* + * Insert a page header. + * If the start of the page is a continuous data from the last page, + * insert XLogContRecor too. + */ + if ((write_pos - xlog_buff) >= XLogSegSize) + return 1; + phead_pos = write_pos; + insert_pageheader(write_pos, &baseheader, hasContRecord); + write_pos += SizeOfXLogShortPHD; + freespace = (XLOG_BLCKSZ - SizeOfXLogShortPHD); + rec_head_pos = write_pos; + if (hasContRecord) + { + insert_XLogContRecord(write_pos, rem_len); + write_pos += SizeOfXLogContRecord; + freespace -= SizeOfXLogContRecord; + } + dump_page_header(((write_pos - xlog_buff) / XLOG_BLCKSZ), + (XLogPageHeader)phead_pos); + } + return 0; + } + + /* + * Calculate free space size of the page. + */ + static int + get_freespace(void) + { + return XLOG_BLCKSZ - (write_pos - xlog_buff) % XLOG_BLCKSZ; + } + + /* + * Insert a XLogContRecord to the buffer. + * + * Parameters: + * write_pos: Write position in the buffer. + * rem_len: Length of the remaining record which continues from the last + * page. + */ + static void + insert_XLogContRecord(char *write_pos, int rem_len) + { + XLogContRecord contrec; + + contrec.xl_rem_len = rem_len; + memcpy(write_pos, (char *)&contrec, SizeOfXLogContRecord); + } + + /* + * Insert a page header to the buffer. + * + * Parameters: + * write_pos: Write position in the buffer. + * pheader: Pointer to the structure holding header info at the firt page of + * the segment. + * hasContRecord: Flag to indicate a continuous record from the last page. + */ + static void + insert_pageheader(char *write_pos, XLogPageHeader pheader, bool hasContRecord) + { + /* + * Each page header is restored using the page header at the first page of + * the WAL segment. Magic number (xlp_magic), timeline id (xlp_tli) and + * XLOGID (xlogid) should no change within a segment and they are copied + * from the first page header. Continuous data (xlp_info) depends on the + * record of a given page. + * xrecoff is calculated by adding XLOG_BLKSZ to xrecoff value in the first + * page header. + */ + pheader->xlp_info &= ~XLP_ALL_FLAGS; + if (hasContRecord) + pheader->xlp_info |= XLP_FIRST_IS_CONTRECORD; + pheader->xlp_pageaddr.xrecoff += XLOG_BLCKSZ; + memcpy(write_pos, (char *)pheader, SizeOfXLogShortPHD); + } + + /* + * Open thie archive log file to be restored and return file descriptor. + * + * The first argument of the command is an input file name. + * If omitted or specified as "-", stdin will be used as an input file. + * + * Parameters: + * argc: This is one of the argument to the command, number of the arguments. + * argv: This is one of the argument to the command, a pointer arry to the + * argument list. + * + * Note: If error occurs within this function, whole command will exit here + * using exit() and the caller will not have any chance to take care of errors. + */ + static int + open_arch_file(int argc, char *argv[]) + { + int from_fd = -1; + + if (argc > 1 && strcmp(argv[1], "-") != 0) + { + /* Open archive log file to restore. */ + from_fd = open(argv[1], O_RDONLY, 0); + if (from_fd < 0) + { + fprintf(stderr, "failed to open `%s': %s\n", argv[1], + strerror(errno)); + exit(1); + } + + /* Obtain the segment ID from the file name (for record dump). */ + get_segment_id(argv[1]); + } + else + from_fd = fileno(stdin); + + return from_fd; + } + + /* + * Open the WAL segment file to write the restored data and return file + * descriptor. + * + * The secomd argument to the command will be regarded as an output file. + * If omitted or specified as "-", stdout will be used as the output file. + * + * Parameters: + * argc: This is one of the argument to the command, number of the arguments. + * argv: This is one of the argument to the command, a pointer arry to the + * argument list. + * + * Note: If error occurs within this function, whole command will exit here + * using exit() and the caller will not have any chance to take care of errors. + */ + static int + open_xlog_file(int argc, char *argv[]) + { + int to_fd = -1; + + if (argc > 2 && strcmp(argv[2], "-") != 0) + { + /* Open the WAL segment file */ + to_fd = open(argv[2], O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + if (to_fd < 0) + { + fprintf(stderr, "failed to open `%s': %s\n", argv[2], + strerror(errno)); + exit(1); + } + } + else + to_fd = fileno(stdout); + + return to_fd; + }