diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index be3adf1..4ec485e 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -64,10 +64,14 @@ /* ---------- * Paths for the statistics files (relative to installation's $PGDATA). + * Permanent and temprorary, global and per-database files. * ---------- */ -#define PGSTAT_STAT_PERMANENT_FILENAME "global/pgstat.stat" -#define PGSTAT_STAT_PERMANENT_TMPFILE "global/pgstat.tmp" +#define PGSTAT_STAT_PERMANENT_DIRECTORY "stat" +#define PGSTAT_STAT_PERMANENT_FILENAME "stat/global.stat" +#define PGSTAT_STAT_PERMANENT_TMPFILE "stat/global.tmp" +#define PGSTAT_STAT_PERMANENT_DB_FILENAME "stat/%d.stat" +#define PGSTAT_STAT_PERMANENT_DB_TMPFILE "stat/%d.tmp" /* ---------- * Timer definitions. @@ -115,8 +119,11 @@ int pgstat_track_activity_query_size = 1024; * Built from GUC parameter * ---------- */ +char *pgstat_stat_directory = NULL; char *pgstat_stat_filename = NULL; char *pgstat_stat_tmpname = NULL; +char *pgstat_stat_db_filename = NULL; +char *pgstat_stat_db_tmpname = NULL; /* * BgWriter global statistics counters (unused in other processes). @@ -219,11 +226,16 @@ static int localNumBackends = 0; */ static PgStat_GlobalStats globalStats; -/* Last time the collector successfully wrote the stats file */ -static TimestampTz last_statwrite; +/* Write request info for each database */ +typedef struct DBWriteRequest +{ + Oid databaseid; /* OID of the database to write */ + TimestampTz request_time; /* timestamp of the last write request */ +} DBWriteRequest; -/* Latest statistics request time from backends */ -static TimestampTz last_statrequest; +/* Latest statistics request time from backends for each DB */ +static DBWriteRequest * last_statrequests = NULL; +static int num_statrequests = 0; static volatile bool need_exit = false; static volatile bool got_SIGHUP = false; @@ -252,11 +264,17 @@ static void pgstat_sighup_handler(SIGNAL_ARGS); static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create); static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create); -static void pgstat_write_statsfile(bool permanent); -static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent); +static void pgstat_write_statsfile(bool permanent, bool force); +static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent); +static void pgstat_write_db_dummyfile(Oid databaseid); +static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs); +static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent); static void backend_read_statsfile(void); static void pgstat_read_current_status(void); +static bool pgstat_write_statsfile_needed(); +static bool pgstat_db_requested(Oid databaseid); + static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); static void pgstat_send_funcstats(void); static HTAB *pgstat_collect_oids(Oid catalogid); @@ -285,7 +303,6 @@ static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); - /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ @@ -549,8 +566,34 @@ startup_failed: void pgstat_reset_all(void) { - unlink(pgstat_stat_filename); - unlink(PGSTAT_STAT_PERMANENT_FILENAME); + DIR * dir; + struct dirent * entry; + + dir = AllocateDir(pgstat_stat_directory); + while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL) + { + char fname[strlen(pgstat_stat_directory) + strlen(entry->d_name) + 1]; + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + sprintf(fname, "%s/%s", pgstat_stat_directory, entry->d_name); + unlink(fname); + } + FreeDir(dir); + + dir = AllocateDir(PGSTAT_STAT_PERMANENT_DIRECTORY); + while ((entry = ReadDir(dir, PGSTAT_STAT_PERMANENT_DIRECTORY)) != NULL) + { + char fname[strlen(PGSTAT_STAT_PERMANENT_FILENAME) + strlen(entry->d_name) + 1]; + + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) + continue; + + sprintf(fname, "%s/%s", PGSTAT_STAT_PERMANENT_FILENAME, entry->d_name); + unlink(fname); + } + FreeDir(dir); } #ifdef EXEC_BACKEND @@ -1408,13 +1451,14 @@ pgstat_ping(void) * ---------- */ static void -pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time) +pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid) { PgStat_MsgInquiry msg; pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY); msg.clock_time = clock_time; msg.cutoff_time = cutoff_time; + msg.databaseid = databaseid; pgstat_send(&msg, sizeof(msg)); } @@ -3004,6 +3048,7 @@ PgstatCollectorMain(int argc, char *argv[]) int len; PgStat_Msg msg; int wr; + bool first_write = true; IsUnderPostmaster = true; /* we are a postmaster subprocess now */ @@ -3053,17 +3098,11 @@ PgstatCollectorMain(int argc, char *argv[]) init_ps_display("stats collector process", "", "", ""); /* - * Arrange to write the initial status file right away - */ - last_statrequest = GetCurrentTimestamp(); - last_statwrite = last_statrequest - 1; - - /* * Read in an existing statistics stats file or initialize the stats to - * zero. + * zero (read data for all databases, including table/func stats). */ pgStatRunningInCollector = true; - pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); + pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false); /* * Loop to process messages until we get SIGQUIT or detect ungraceful @@ -3107,10 +3146,14 @@ PgstatCollectorMain(int argc, char *argv[]) /* * Write the stats file if a new request has arrived that is not - * satisfied by existing file. + * satisfied by existing file (force writing all files if it's + * the first write after startup). */ - if (last_statwrite < last_statrequest) - pgstat_write_statsfile(false); + if (first_write || pgstat_write_statsfile_needed()) + { + pgstat_write_statsfile(false, first_write); + first_write = false; + } /* * Try to receive and process a message. This will not block, @@ -3269,7 +3312,7 @@ PgstatCollectorMain(int argc, char *argv[]) /* * Save the final stats to reuse at next startup. */ - pgstat_write_statsfile(true); + pgstat_write_statsfile(true, true); exit(0); } @@ -3429,23 +3472,25 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create) * shutting down only), remove the temporary file so that backends * starting up under a new postmaster can't read the old data before * the new collector is ready. + * + * When the 'force' is false, only the requested databases (listed in + * last_statrequests) will be written. If 'force' is true, all databases + * will be written (this is used e.g. at shutdown). * ---------- */ static void -pgstat_write_statsfile(bool permanent) +pgstat_write_statsfile(bool permanent, bool force) { HASH_SEQ_STATUS hstat; - HASH_SEQ_STATUS tstat; - HASH_SEQ_STATUS fstat; PgStat_StatDBEntry *dbentry; - PgStat_StatTabEntry *tabentry; - PgStat_StatFuncEntry *funcentry; FILE *fpout; int32 format_id; const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname; const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; int rc; + elog(DEBUG1, "writing statsfile '%s'", statfile); + /* * Open the statistics temp file to write out the current values. */ @@ -3484,6 +3529,20 @@ pgstat_write_statsfile(bool permanent) while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL) { /* + * Write our the tables and functions into a separate file, but only + * if the database is in the requests or if it's a forced write (then + * all the DBs need to be written - e.g. at the shutdown). + * + * We need to do this before the dbentry write to write the proper + * timestamp to the global file. + */ + if (force || pgstat_db_requested(dbentry->databaseid)) { + elog(DEBUG1, "writing statsfile for DB %d", dbentry->databaseid); + dbentry->stats_timestamp = globalStats.stats_timestamp; + pgstat_write_db_statsfile(dbentry, permanent); + } + + /* * Write out the DB entry including the number of live backends. We * don't write the tables or functions pointers, since they're of no * use to any other process. @@ -3493,29 +3552,10 @@ pgstat_write_statsfile(bool permanent) (void) rc; /* we'll check for error with ferror */ /* - * Walk through the database's access stats per table. - */ - hash_seq_init(&tstat, dbentry->tables); - while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL) - { - fputc('T', fpout); - rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout); - (void) rc; /* we'll check for error with ferror */ - } - - /* - * Walk through the database's function stats table. - */ - hash_seq_init(&fstat, dbentry->functions); - while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL) - { - fputc('F', fpout); - rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout); - (void) rc; /* we'll check for error with ferror */ - } - - /* * Mark the end of this DB + * + * TODO Does using these chars still make sense, when the tables/func + * stats are moved to a separate file? */ fputc('d', fpout); } @@ -3527,6 +3567,28 @@ pgstat_write_statsfile(bool permanent) */ fputc('E', fpout); + /* In any case, we can just throw away all the db requests, but we need to + * write dummy files for databases without a stat entry (it would cause + * issues in pgstat_read_db_statsfile_timestamp and pgstat wait timeouts). + * This may happend e.g. for shared DB (oid = 0) right after initdb. + */ + if (last_statrequests != NULL) + { + int i = 0; + for (i = 0; i < num_statrequests; i++) + { + /* Create dummy files for requested databases without a proper + * dbentry. It's much easier this way than dealing with multiple + * timestamps, possibly existing but not yet written DBs etc. */ + if (! pgstat_get_db_entry(last_statrequests[i].databaseid, false)) + pgstat_write_db_dummyfile(last_statrequests[i].databaseid); + } + + pfree(last_statrequests); + last_statrequests = NULL; + num_statrequests = 0; + } + if (ferror(fpout)) { ereport(LOG, @@ -3552,57 +3614,247 @@ pgstat_write_statsfile(bool permanent) tmpfile, statfile))); unlink(tmpfile); } - else + + if (permanent) + unlink(pgstat_stat_filename); +} + + +/* ---------- + * pgstat_write_db_statsfile() - + * + * Tell the news. This writes stats file for a single database. + * + * If writing to the permanent file (happens when the collector is + * shutting down only), remove the temporary file so that backends + * starting up under a new postmaster can't read the old data before + * the new collector is ready. + * ---------- + */ +static void +pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent) +{ + HASH_SEQ_STATUS tstat; + HASH_SEQ_STATUS fstat; + PgStat_StatTabEntry *tabentry; + PgStat_StatFuncEntry *funcentry; + FILE *fpout; + int32 format_id; + const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_DB_TMPFILE : pgstat_stat_db_tmpname; + const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename; + int rc; + + /* + * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte + */ + char db_tmpfile[strlen(tmpfile) + 11]; + char db_statfile[strlen(statfile) + 11]; + + /* + * Append database OID at the end of the basic filename (both for tmp and target file). + */ + snprintf(db_tmpfile, strlen(tmpfile) + 11, tmpfile, dbentry->databaseid); + snprintf(db_statfile, strlen(statfile) + 11, statfile, dbentry->databaseid); + + elog(DEBUG1, "writing statsfile '%s'", db_statfile); + + /* + * Open the statistics temp file to write out the current values. + */ + fpout = AllocateFile(db_tmpfile, PG_BINARY_W); + if (fpout == NULL) { - /* - * Successful write, so update last_statwrite. - */ - last_statwrite = globalStats.stats_timestamp; + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open temporary statistics file \"%s\": %m", + db_tmpfile))); + return; + } - /* - * If there is clock skew between backends and the collector, we could - * receive a stats request time that's in the future. If so, complain - * and reset last_statrequest. Resetting ensures that no inquiry - * message can cause more than one stats file write to occur. - */ - if (last_statrequest > last_statwrite) - { - char *reqtime; - char *mytime; - - /* Copy because timestamptz_to_str returns a static buffer */ - reqtime = pstrdup(timestamptz_to_str(last_statrequest)); - mytime = pstrdup(timestamptz_to_str(last_statwrite)); - elog(LOG, "last_statrequest %s is later than collector's time %s", - reqtime, mytime); - pfree(reqtime); - pfree(mytime); - - last_statrequest = last_statwrite; - } + /* + * Write the file header --- currently just a format ID. + */ + format_id = PGSTAT_FILE_FORMAT_ID; + rc = fwrite(&format_id, sizeof(format_id), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + + /* + * Write the timestamp. + */ + rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + + /* + * Walk through the database's access stats per table. + */ + hash_seq_init(&tstat, dbentry->tables); + while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL) + { + fputc('T', fpout); + rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout); + (void) rc; /* we'll check for error with ferror */ } + /* + * Walk through the database's function stats table. + */ + hash_seq_init(&fstat, dbentry->functions); + while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL) + { + fputc('F', fpout); + rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + } + + /* + * No more output to be done. Close the temp file and replace the old + * pgstat.stat with it. The ferror() check replaces testing for error + * after each individual fputc or fwrite above. + */ + fputc('E', fpout); + + if (ferror(fpout)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write temporary statistics file \"%s\": %m", + db_tmpfile))); + FreeFile(fpout); + unlink(db_tmpfile); + } + else if (FreeFile(fpout) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close temporary statistics file \"%s\": %m", + db_tmpfile))); + unlink(db_tmpfile); + } + else if (rename(db_tmpfile, db_statfile) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m", + db_tmpfile, db_statfile))); + unlink(db_tmpfile); + } + if (permanent) - unlink(pgstat_stat_filename); + { + char db_statfile[strlen(pgstat_stat_db_filename) + 11]; + snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11, + pgstat_stat_db_filename, dbentry->databaseid); + elog(DEBUG1, "removing temporary stat file '%s'", db_statfile); + unlink(db_statfile); + } } /* ---------- + * pgstat_write_db_dummyfile() - + * + * All this does is writing a dummy stat file for databases without dbentry + * yet. It basically writes just a file header - format ID and a timestamp. + * ---------- + */ +static void +pgstat_write_db_dummyfile(Oid databaseid) +{ + FILE *fpout; + int32 format_id; + int rc; + + /* + * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte + */ + char db_tmpfile[strlen(pgstat_stat_db_tmpname) + 11]; + char db_statfile[strlen(pgstat_stat_db_filename) + 11]; + + /* + * Append database OID at the end of the basic filename (both for tmp and target file). + */ + snprintf(db_tmpfile, strlen(pgstat_stat_db_tmpname) + 11, pgstat_stat_db_tmpname, databaseid); + snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11, pgstat_stat_db_filename, databaseid); + + elog(DEBUG1, "writing statsfile '%s'", db_statfile); + + /* + * Open the statistics temp file to write out the current values. + */ + fpout = AllocateFile(db_tmpfile, PG_BINARY_W); + if (fpout == NULL) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open temporary statistics file \"%s\": %m", + db_tmpfile))); + return; + } + + /* + * Write the file header --- currently just a format ID. + */ + format_id = PGSTAT_FILE_FORMAT_ID; + rc = fwrite(&format_id, sizeof(format_id), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + + /* + * Write the timestamp. + */ + rc = fwrite(&(globalStats.stats_timestamp), sizeof(globalStats.stats_timestamp), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + + /* + * No more output to be done. Close the temp file and replace the old + * pgstat.stat with it. The ferror() check replaces testing for error + * after each individual fputc or fwrite above. + */ + fputc('E', fpout); + + if (ferror(fpout)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write temporary dummy statistics file \"%s\": %m", + db_tmpfile))); + FreeFile(fpout); + unlink(db_tmpfile); + } + else if (FreeFile(fpout) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close temporary dummy statistics file \"%s\": %m", + db_tmpfile))); + unlink(db_tmpfile); + } + else if (rename(db_tmpfile, db_statfile) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not rename temporary dummy statistics file \"%s\" to \"%s\": %m", + db_tmpfile, db_statfile))); + unlink(db_tmpfile); + } + +} + +/* ---------- * pgstat_read_statsfile() - * * Reads in an existing statistics collector file and initializes the * databases' hash table (whose entries point to the tables' hash tables). + * + * Allows reading only the global stats (at database level), which is just + * enough for many purposes (e.g. autovacuum launcher etc.). If this is + * sufficient for you, use onlydbs=true. * ---------- */ static HTAB * -pgstat_read_statsfile(Oid onlydb, bool permanent) +pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs) { PgStat_StatDBEntry *dbentry; PgStat_StatDBEntry dbbuf; - PgStat_StatTabEntry *tabentry; - PgStat_StatTabEntry tabbuf; - PgStat_StatFuncEntry funcbuf; - PgStat_StatFuncEntry *funcentry; HASHCTL hash_ctl; HTAB *dbhash; HTAB *tabhash = NULL; @@ -3613,6 +3865,11 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; /* + * If we want a db-level stats only, we don't want a particular db. + */ + Assert(!((onlydb != InvalidOid) && onlydbs)); + + /* * The tables will live in pgStatLocalContext. */ pgstat_setup_memcxt(); @@ -3758,6 +4015,16 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) */ tabhash = dbentry->tables; funchash = dbentry->functions; + + /* + * Read the data from the file for this database. If there was + * onlydb specified (!= InvalidOid), we would not get here because + * of a break above. So we don't need to recheck. + */ + if (! onlydbs) + pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash, + permanent); + break; /* @@ -3768,6 +4035,105 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) funchash = NULL; break; + case 'E': + goto done; + + default: + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", + statfile))); + goto done; + } + } + +done: + FreeFile(fpin); + + if (permanent) + unlink(PGSTAT_STAT_PERMANENT_FILENAME); + + return dbhash; +} + + +/* ---------- + * pgstat_read_db_statsfile() - + * + * Reads in an existing statistics collector db file and initializes the + * tables and functions hash tables (for the database identified by Oid). + * ---------- + */ +static void +pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent) +{ + PgStat_StatTabEntry *tabentry; + PgStat_StatTabEntry tabbuf; + PgStat_StatFuncEntry funcbuf; + PgStat_StatFuncEntry *funcentry; + FILE *fpin; + int32 format_id; + TimestampTz timestamp; + bool found; + const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename; + + /* + * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 byte + */ + char db_statfile[strlen(statfile) + 11]; + + /* + * Append database OID at the end of the basic filename (both for tmp and target file). + */ + snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid); + + /* + * Try to open the status file. If it doesn't exist, the backends simply + * return zero for anything and the collector simply starts from scratch + * with empty counters. + * + * ENOENT is a possibility if the stats collector is not running or has + * not yet written the stats file the first time. Any other failure + * condition is suspicious. + */ + if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL) + { + if (errno != ENOENT) + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errcode_for_file_access(), + errmsg("could not open statistics file \"%s\": %m", + db_statfile))); + return; + } + + /* + * Verify it's of the expected format. + */ + if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id) + || format_id != PGSTAT_FILE_FORMAT_ID) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", db_statfile))); + goto done; + } + + /* + * Read global stats struct + */ + if (fread(×tamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp)) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", db_statfile))); + goto done; + } + + /* + * We found an existing collector stats file. Read it and put all the + * hashtable entries into place. + */ + for (;;) + { + switch (fgetc(fpin)) + { /* * 'T' A PgStat_StatTabEntry follows. */ @@ -3777,7 +4143,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) { ereport(pgStatRunningInCollector ? LOG : WARNING, (errmsg("corrupted statistics file \"%s\"", - statfile))); + db_statfile))); goto done; } @@ -3795,7 +4161,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) { ereport(pgStatRunningInCollector ? LOG : WARNING, (errmsg("corrupted statistics file \"%s\"", - statfile))); + db_statfile))); goto done; } @@ -3811,7 +4177,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) { ereport(pgStatRunningInCollector ? LOG : WARNING, (errmsg("corrupted statistics file \"%s\"", - statfile))); + db_statfile))); goto done; } @@ -3829,7 +4195,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) { ereport(pgStatRunningInCollector ? LOG : WARNING, (errmsg("corrupted statistics file \"%s\"", - statfile))); + db_statfile))); goto done; } @@ -3845,7 +4211,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) default: ereport(pgStatRunningInCollector ? LOG : WARNING, (errmsg("corrupted statistics file \"%s\"", - statfile))); + db_statfile))); goto done; } } @@ -3854,37 +4220,47 @@ done: FreeFile(fpin); if (permanent) - unlink(PGSTAT_STAT_PERMANENT_FILENAME); + { + char db_statfile[strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) + 11]; + snprintf(db_statfile, strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) + 11, + PGSTAT_STAT_PERMANENT_DB_FILENAME, databaseid); + elog(DEBUG1, "removing permanent stats file '%s'", db_statfile); + unlink(db_statfile); + } - return dbhash; + return; } /* ---------- - * pgstat_read_statsfile_timestamp() - + * pgstat_read_db_statsfile_timestamp() - * - * Attempt to fetch the timestamp of an existing stats file. + * Attempt to fetch the timestamp of an existing stats file (for a DB). * Returns TRUE if successful (timestamp is stored at *ts). * ---------- */ static bool -pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts) +pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz *ts) { - PgStat_GlobalStats myGlobalStats; + TimestampTz timestamp; FILE *fpin; int32 format_id; - const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; + const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : pgstat_stat_db_filename; + char db_statfile[strlen(statfile) + 11]; + + /* format the db statfile filename */ + snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid); /* * Try to open the status file. As above, anything but ENOENT is worthy * of complaining about. */ - if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) + if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL) { if (errno != ENOENT) ereport(pgStatRunningInCollector ? LOG : WARNING, (errcode_for_file_access(), errmsg("could not open statistics file \"%s\": %m", - statfile))); + db_statfile))); return false; } @@ -3895,7 +4271,7 @@ pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts) || format_id != PGSTAT_FILE_FORMAT_ID) { ereport(pgStatRunningInCollector ? LOG : WARNING, - (errmsg("corrupted statistics file \"%s\"", statfile))); + (errmsg("corrupted statistics file \"%s\"", db_statfile))); FreeFile(fpin); return false; } @@ -3903,15 +4279,15 @@ pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts) /* * Read global stats struct */ - if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != sizeof(myGlobalStats)) + if (fread(×tamp, 1, sizeof(TimestampTz), fpin) != sizeof(TimestampTz)) { ereport(pgStatRunningInCollector ? LOG : WARNING, - (errmsg("corrupted statistics file \"%s\"", statfile))); + (errmsg("corrupted statistics file \"%s\"", db_statfile))); FreeFile(fpin); return false; } - *ts = myGlobalStats.stats_timestamp; + *ts = timestamp; FreeFile(fpin); return true; @@ -3947,7 +4323,7 @@ backend_read_statsfile(void) CHECK_FOR_INTERRUPTS(); - ok = pgstat_read_statsfile_timestamp(false, &file_ts); + ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, &file_ts); cur_ts = GetCurrentTimestamp(); /* Calculate min acceptable timestamp, if we didn't already */ @@ -4006,7 +4382,7 @@ backend_read_statsfile(void) pfree(mytime); } - pgstat_send_inquiry(cur_ts, min_ts); + pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId); break; } @@ -4016,7 +4392,7 @@ backend_read_statsfile(void) /* Not there or too old, so kick the collector and wait a bit */ if ((count % PGSTAT_INQ_LOOP_COUNT) == 0) - pgstat_send_inquiry(cur_ts, min_ts); + pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId); pg_usleep(PGSTAT_RETRY_DELAY * 1000L); } @@ -4026,9 +4402,16 @@ backend_read_statsfile(void) /* Autovacuum launcher wants stats about all databases */ if (IsAutoVacuumLauncherProcess()) - pgStatDBHash = pgstat_read_statsfile(InvalidOid, false); + /* + * FIXME Does it really need info including tables/functions? Or is it enough to read + * database-level stats? It seems to me the launcher needs PgStat_StatDBEntry only + * (at least that's how I understand the rebuild_database_list() in autovacuum.c), + * because pgstat_stattabentries are used in do_autovacuum() only, that that's what's + * executed in workers ... So maybe we'd be just fine by reading in the dbentries? + */ + pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true); else - pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false); + pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, false); } @@ -4084,44 +4467,84 @@ pgstat_clear_snapshot(void) static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len) { - /* - * Advance last_statrequest if this requestor has a newer cutoff time - * than any previous request. - */ - if (msg->cutoff_time > last_statrequest) - last_statrequest = msg->cutoff_time; + int i = 0; + bool found = false; + PgStat_StatDBEntry *dbentry; + + elog(DEBUG1, "received inquiry for %d", msg->databaseid); /* - * If the requestor's local clock time is older than last_statwrite, we - * should suspect a clock glitch, ie system time going backwards; though - * the more likely explanation is just delayed message receipt. It is - * worth expending a GetCurrentTimestamp call to be sure, since a large - * retreat in the system clock reading could otherwise cause us to neglect - * to update the stats file for a long time. + * Find the last write request for this DB (found=true in that case). Plain + * linear search, not really worth doing any magic here (probably). */ - if (msg->clock_time < last_statwrite) + for (i = 0; i < num_statrequests; i++) + { + if (last_statrequests[i].databaseid == msg->databaseid) + { + found = true; + break; + } + } + + if (found) + { + /* + * There already is a request for this DB, so lets advance the + * request time if this requestor has a newer cutoff time + * than any previous request. + */ + if (msg->cutoff_time > last_statrequests[i].request_time) + last_statrequests[i].request_time = msg->cutoff_time; + } + else { - TimestampTz cur_ts = GetCurrentTimestamp(); + /* + * There's no request for this DB yet, so lets create it (allocate a + * space for it, set the values). + */ + if (last_statrequests == NULL) + last_statrequests = palloc(sizeof(DBWriteRequest)); + else + last_statrequests = repalloc(last_statrequests, + (num_statrequests + 1)*sizeof(DBWriteRequest)); + + last_statrequests[num_statrequests].databaseid = msg->databaseid; + last_statrequests[num_statrequests].request_time = msg->clock_time; + num_statrequests += 1; - if (cur_ts < last_statwrite) + /* + * If the requestor's local clock time is older than last_statwrite, we + * should suspect a clock glitch, ie system time going backwards; though + * the more likely explanation is just delayed message receipt. It is + * worth expending a GetCurrentTimestamp call to be sure, since a large + * retreat in the system clock reading could otherwise cause us to neglect + * to update the stats file for a long time. + */ + dbentry = pgstat_get_db_entry(msg->databaseid, false); + if ((dbentry != NULL) && (msg->clock_time < dbentry->stats_timestamp)) { - /* - * Sure enough, time went backwards. Force a new stats file write - * to get back in sync; but first, log a complaint. - */ - char *writetime; - char *mytime; - - /* Copy because timestamptz_to_str returns a static buffer */ - writetime = pstrdup(timestamptz_to_str(last_statwrite)); - mytime = pstrdup(timestamptz_to_str(cur_ts)); - elog(LOG, "last_statwrite %s is later than collector's time %s", - writetime, mytime); - pfree(writetime); - pfree(mytime); - - last_statrequest = cur_ts; - last_statwrite = last_statrequest - 1; + TimestampTz cur_ts = GetCurrentTimestamp(); + + if (cur_ts < dbentry->stats_timestamp) + { + /* + * Sure enough, time went backwards. Force a new stats file write + * to get back in sync; but first, log a complaint. + */ + char *writetime; + char *mytime; + + /* Copy because timestamptz_to_str returns a static buffer */ + writetime = pstrdup(timestamptz_to_str(dbentry->stats_timestamp)); + mytime = pstrdup(timestamptz_to_str(cur_ts)); + elog(LOG, "last_statwrite %s is later than collector's time %s for " + "db %d", writetime, mytime, dbentry->databaseid); + pfree(writetime); + pfree(mytime); + + last_statrequests[num_statrequests].request_time = cur_ts; + dbentry->stats_timestamp = cur_ts - 1; + } } } } @@ -4278,10 +4701,17 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len) dbentry = pgstat_get_db_entry(msg->m_databaseid, false); /* - * If found, remove it. + * If found, remove it (along with the db statfile). */ if (dbentry) { + char db_statfile[strlen(pgstat_stat_db_filename) + 11]; + snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11, + pgstat_stat_filename, dbentry->databaseid); + + elog(DEBUG1, "removing %s", db_statfile); + unlink(db_statfile); + if (dbentry->tables != NULL) hash_destroy(dbentry->tables); if (dbentry->functions != NULL) @@ -4687,3 +5117,58 @@ pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len) HASH_REMOVE, NULL); } } + +/* ---------- + * pgstat_write_statsfile_needed() - + * + * Checks whether there's a db stats request, requiring a file write. + * + * TODO Seems that thanks the way we handle last_statrequests (erase after + * a write), this is unnecessary. Just check that there's at least one + * request and you're done. Although there might be delayed requests ... + * ---------- + */ + +static bool pgstat_write_statsfile_needed() +{ + int i = 0; + PgStat_StatDBEntry *dbentry; + + /* Check the databases if they need to refresh the stats. */ + for (i = 0; i < num_statrequests; i++) + { + dbentry = pgstat_get_db_entry(last_statrequests[i].databaseid, false); + + /* No dbentry yet or too old. */ + if ((! dbentry) || + (dbentry->stats_timestamp < last_statrequests[i].request_time)) { + return true; + } + + } + + /* Well, everything was written recently ... */ + return false; +} + +/* ---------- + * pgstat_write_statsfile_needed() - + * + * Checks whether stats for a particular DB need to be written to a file). + * ---------- + */ + +static bool +pgstat_db_requested(Oid databaseid) +{ + int i = 0; + + /* Check the databases if they need to refresh the stats. */ + for (i = 0; i < num_statrequests; i++) + { + if (last_statrequests[i].databaseid == databaseid) + return true; + } + + return false; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2cf34ce..e3e432b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -8730,20 +8730,43 @@ static void assign_pgstat_temp_directory(const char *newval, void *extra) { /* check_canonical_path already canonicalized newval for us */ + char *dname; char *tname; char *fname; - - tname = guc_malloc(ERROR, strlen(newval) + 12); /* /pgstat.tmp */ - sprintf(tname, "%s/pgstat.tmp", newval); - fname = guc_malloc(ERROR, strlen(newval) + 13); /* /pgstat.stat */ - sprintf(fname, "%s/pgstat.stat", newval); - + char *tname_db; + char *fname_db; + + /* directory */ + dname = guc_malloc(ERROR, strlen(newval) + 1); /* runtime dir */ + sprintf(dname, "%s", newval); + + /* global stats */ + tname = guc_malloc(ERROR, strlen(newval) + 12); /* /global.tmp */ + sprintf(tname, "%s/global.tmp", newval); + fname = guc_malloc(ERROR, strlen(newval) + 13); /* /global.stat */ + sprintf(fname, "%s/global.stat", newval); + + /* per-db stats */ + tname_db = guc_malloc(ERROR, strlen(newval) + 8); /* /%d.tmp */ + sprintf(tname_db, "%s/%%d.tmp", newval); + fname_db = guc_malloc(ERROR, strlen(newval) + 9); /* /%d.stat */ + sprintf(fname_db, "%s/%%d.stat", newval); + + if (pgstat_stat_directory) + free(pgstat_stat_directory); + pgstat_stat_directory = dname; if (pgstat_stat_tmpname) free(pgstat_stat_tmpname); pgstat_stat_tmpname = tname; if (pgstat_stat_filename) free(pgstat_stat_filename); pgstat_stat_filename = fname; + if (pgstat_stat_db_tmpname) + free(pgstat_stat_db_tmpname); + pgstat_stat_db_tmpname = tname_db; + if (pgstat_stat_db_filename) + free(pgstat_stat_db_filename); + pgstat_stat_db_filename = fname_db; } static bool diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 3e05ac3..a8a2639 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -179,6 +179,7 @@ char *restrict_env; #endif const char *subdirs[] = { "global", + "stat", "pg_xlog", "pg_xlog/archive_status", "pg_clog", diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 613c1c2..b3467d2 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -205,6 +205,7 @@ typedef struct PgStat_MsgInquiry PgStat_MsgHdr m_hdr; TimestampTz clock_time; /* observed local clock time */ TimestampTz cutoff_time; /* minimum acceptable file timestamp */ + Oid databaseid; /* requested DB (InvalidOid => all DBs) */ } PgStat_MsgInquiry; @@ -514,7 +515,7 @@ typedef union PgStat_Msg * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9A +#define PGSTAT_FILE_FORMAT_ID 0xA240CA47 /* ---------- * PgStat_StatDBEntry The collector's data per database @@ -545,6 +546,7 @@ typedef struct PgStat_StatDBEntry PgStat_Counter n_block_write_time; TimestampTz stat_reset_timestamp; + TimestampTz stats_timestamp; /* time of db stats file update */ /* * tables and functions must be last in the struct, because we don't write @@ -722,8 +724,11 @@ extern bool pgstat_track_activities; extern bool pgstat_track_counts; extern int pgstat_track_functions; extern PGDLLIMPORT int pgstat_track_activity_query_size; +extern char *pgstat_stat_directory; extern char *pgstat_stat_tmpname; extern char *pgstat_stat_filename; +extern char *pgstat_stat_db_tmpname; +extern char *pgstat_stat_db_filename; /* * BgWriter statistics counters are updated directly by bgwriter and bufmgr