diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index be3adf1..226311c 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -253,7 +253,9 @@ static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create); static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create); static void pgstat_write_statsfile(bool permanent); -static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent); +static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent); +static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs); +static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent); static void backend_read_statsfile(void); static void pgstat_read_current_status(void); @@ -1408,13 +1410,14 @@ pgstat_ping(void) * ---------- */ static void -pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time) +pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid databaseid) { PgStat_MsgInquiry msg; pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY); msg.clock_time = clock_time; msg.cutoff_time = cutoff_time; + msg.databaseid = databaseid; pgstat_send(&msg, sizeof(msg)); } @@ -3063,7 +3066,7 @@ PgstatCollectorMain(int argc, char *argv[]) * zero. */ pgStatRunningInCollector = true; - pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); + pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false); /* * Loop to process messages until we get SIGQUIT or detect ungraceful @@ -3435,11 +3438,7 @@ static void pgstat_write_statsfile(bool permanent) { HASH_SEQ_STATUS hstat; - HASH_SEQ_STATUS tstat; - HASH_SEQ_STATUS fstat; PgStat_StatDBEntry *dbentry; - PgStat_StatTabEntry *tabentry; - PgStat_StatFuncEntry *funcentry; FILE *fpout; int32 format_id; const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname; @@ -3493,29 +3492,15 @@ pgstat_write_statsfile(bool permanent) (void) rc; /* we'll check for error with ferror */ /* - * Walk through the database's access stats per table. + * Write our the tables and functions into a separate file. */ - hash_seq_init(&tstat, dbentry->tables); - while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL) - { - fputc('T', fpout); - rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout); - (void) rc; /* we'll check for error with ferror */ - } - - /* - * Walk through the database's function stats table. - */ - hash_seq_init(&fstat, dbentry->functions); - while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL) - { - fputc('F', fpout); - rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout); - (void) rc; /* we'll check for error with ferror */ - } + pgstat_write_db_statsfile(dbentry, permanent); /* * Mark the end of this DB + * + * FIXME does it really make much sense, when the tables/functions + * are moved to a separate file (using those chars?) */ fputc('d', fpout); } @@ -3587,6 +3572,111 @@ pgstat_write_statsfile(bool permanent) } + +/* ---------- + * pgstat_write_statsfile() - + * + * Tell the news. + * If writing to the permanent file (happens when the collector is + * shutting down only), remove the temporary file so that backends + * starting up under a new postmaster can't read the old data before + * the new collector is ready. + * ---------- + */ +static void +pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent) +{ + HASH_SEQ_STATUS tstat; + HASH_SEQ_STATUS fstat; + PgStat_StatTabEntry *tabentry; + PgStat_StatFuncEntry *funcentry; + FILE *fpout; + int rc; + + /* FIXME Disgusting. Handle properly ... */ + const char *tmpfile_x = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : pgstat_stat_tmpname; + const char *statfile_x = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; + + char tmpfile[255]; + char statfile[255]; + + /* FIXME Do some kind of reduction (e.g. mod(oid,255)) not to end with thousands of files, + *one for each database */ + snprintf(tmpfile, 255, "%s.%d", tmpfile_x, dbentry->databaseid); + snprintf(statfile, 255, "%s.%d", statfile_x, dbentry->databaseid); + + /* + * Open the statistics temp file to write out the current values. + */ + fpout = AllocateFile(tmpfile, PG_BINARY_W); + if (fpout == NULL) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open temporary statistics file \"%s\": %m", + tmpfile))); + return; + } + + /* + * Walk through the database's access stats per table. + */ + hash_seq_init(&tstat, dbentry->tables); + while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL) + { + fputc('T', fpout); + rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + } + + /* + * Walk through the database's function stats table. + */ + hash_seq_init(&fstat, dbentry->functions); + while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) != NULL) + { + fputc('F', fpout); + rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + } + + /* + * No more output to be done. Close the temp file and replace the old + * pgstat.stat with it. The ferror() check replaces testing for error + * after each individual fputc or fwrite above. + */ + fputc('E', fpout); + + if (ferror(fpout)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write temporary statistics file \"%s\": %m", + tmpfile))); + FreeFile(fpout); + unlink(tmpfile); + } + else if (FreeFile(fpout) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close temporary statistics file \"%s\": %m", + tmpfile))); + unlink(tmpfile); + } + else if (rename(tmpfile, statfile) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m", + tmpfile, statfile))); + unlink(tmpfile); + } + + // if (permanent) + // unlink(pgstat_stat_filename); +} + /* ---------- * pgstat_read_statsfile() - * @@ -3595,14 +3685,10 @@ pgstat_write_statsfile(bool permanent) * ---------- */ static HTAB * -pgstat_read_statsfile(Oid onlydb, bool permanent) +pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs) { PgStat_StatDBEntry *dbentry; PgStat_StatDBEntry dbbuf; - PgStat_StatTabEntry *tabentry; - PgStat_StatTabEntry tabbuf; - PgStat_StatFuncEntry funcbuf; - PgStat_StatFuncEntry *funcentry; HASHCTL hash_ctl; HTAB *dbhash; HTAB *tabhash = NULL; @@ -3758,6 +3844,16 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) */ tabhash = dbentry->tables; funchash = dbentry->functions; + + /* + * Read the data from the file for this database. If there was + * onlydb specified (!= InvalidOid), we would not get here because + * of a break above. So we don't need to recheck. + */ + if (! onlydbs) + pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash, + permanent); + break; /* @@ -3768,6 +3864,79 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) funchash = NULL; break; + case 'E': + goto done; + + default: + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", + statfile))); + goto done; + } + } + +done: + FreeFile(fpin); + + if (permanent) + unlink(PGSTAT_STAT_PERMANENT_FILENAME); + + return dbhash; +} + + +/* ---------- + * pgstat_read_db_statsfile() - + * + * Reads in an existing statistics collector db file and initializes the + * tables and functions hash tables (for the database identified by Oid). + * ---------- + */ +static void +pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent) +{ + PgStat_StatTabEntry *tabentry; + PgStat_StatTabEntry tabbuf; + PgStat_StatFuncEntry funcbuf; + PgStat_StatFuncEntry *funcentry; + FILE *fpin; + bool found; + + /* FIXME Disgusting. Handle properly ... */ + const char *statfile_x = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; + char statfile[255]; + + /* FIXME Do some kind of reduction (e.g. mod(oid,255)) not to end with thousands of files, + *one for each database */ + snprintf(statfile, 255, "%s.%d", statfile_x, databaseid); + + /* + * Try to open the status file. If it doesn't exist, the backends simply + * return zero for anything and the collector simply starts from scratch + * with empty counters. + * + * ENOENT is a possibility if the stats collector is not running or has + * not yet written the stats file the first time. Any other failure + * condition is suspicious. + */ + if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) + { + if (errno != ENOENT) + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errcode_for_file_access(), + errmsg("could not open statistics file \"%s\": %m", + statfile))); + return; + } + + /* + * We found an existing collector stats file. Read it and put all the + * hashtable entries into place. + */ + for (;;) + { + switch (fgetc(fpin)) + { /* * 'T' A PgStat_StatTabEntry follows. */ @@ -3853,10 +4022,11 @@ pgstat_read_statsfile(Oid onlydb, bool permanent) done: FreeFile(fpin); - if (permanent) - unlink(PGSTAT_STAT_PERMANENT_FILENAME); +// FIXME unlink permanent filename (with the proper Oid appended +// if (permanent) +// unlink(PGSTAT_STAT_PERMANENT_FILENAME); - return dbhash; + return; } /* ---------- @@ -4006,7 +4176,7 @@ backend_read_statsfile(void) pfree(mytime); } - pgstat_send_inquiry(cur_ts, min_ts); + pgstat_send_inquiry(cur_ts, min_ts, InvalidOid); break; } @@ -4016,7 +4186,7 @@ backend_read_statsfile(void) /* Not there or too old, so kick the collector and wait a bit */ if ((count % PGSTAT_INQ_LOOP_COUNT) == 0) - pgstat_send_inquiry(cur_ts, min_ts); + pgstat_send_inquiry(cur_ts, min_ts, InvalidOid); pg_usleep(PGSTAT_RETRY_DELAY * 1000L); } @@ -4026,9 +4196,16 @@ backend_read_statsfile(void) /* Autovacuum launcher wants stats about all databases */ if (IsAutoVacuumLauncherProcess()) - pgStatDBHash = pgstat_read_statsfile(InvalidOid, false); + /* + * FIXME Does it really need info including tables/functions? Or is it enough to read + * database-level stats? It seems to me the launcher needs PgStat_StatDBEntry only + * (at least that's how I understand the rebuild_database_list() in autovacuum.c), + * because pgstat_stattabentries are used in do_autovacuum() only, that that's what's + * executed in workers ... So maybe we'd be just fine by reading in the dbentries? + */ + pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true); else - pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false); + pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, false); } diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 613c1c2..8971002 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -205,6 +205,7 @@ typedef struct PgStat_MsgInquiry PgStat_MsgHdr m_hdr; TimestampTz clock_time; /* observed local clock time */ TimestampTz cutoff_time; /* minimum acceptable file timestamp */ + Oid databaseid; /* requested DB (InvalidOid => all DBs) */ } PgStat_MsgInquiry;