From caa4ab307a2daaf6cc495aaea78d34dbd66faa2f Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Thu, 17 Mar 2022 13:06:30 +0000 Subject: [PATCH v444] Add checkpoint stats of snapshot and mapping files of pg_logical dir At times, there can be many snapshot and mapping files under pg_logical dir that the checkpoint might have to delete/fsync based on the cutoff LSN which can increase the checkpoint time. Add stats related to these files to better understand the delays or time spent by the checkpointer processing them. Add these new log message only when necessary i.e. at least one snapshot or mapping files is processed during the checkpoint. --- src/backend/access/heap/rewriteheap.c | 30 +++++++++++++++++++++ src/backend/replication/logical/snapbuild.c | 25 +++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 2a53826736..471ad4c5e8 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -1196,6 +1196,13 @@ CheckPointLogicalRewriteHeap(void) DIR *mappings_dir; struct dirent *mapping_de; char path[MAXPGPATH + 20]; + uint64 files_rmvd_cnt = 0; + uint64 files_syncd_cnt = 0; + TimestampTz start_t; + TimestampTz end_t; + + if (log_checkpoints) + start_t = GetCurrentTimestamp(); /* * We start of with a minimum of the last redo pointer. No new decoding @@ -1247,6 +1254,9 @@ CheckPointLogicalRewriteHeap(void) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", path))); + + if (log_checkpoints) + files_rmvd_cnt++; } else { @@ -1280,9 +1290,29 @@ CheckPointLogicalRewriteHeap(void) (errcode_for_file_access(), errmsg("could not close file \"%s\": %m", path))); } + + if (log_checkpoints) + files_syncd_cnt++; } FreeDir(mappings_dir); /* persist directory entries to disk */ fsync_fname("pg_logical/mappings", true); + + /* let's emit these stats only when necessary */ + if (log_checkpoints && + (files_rmvd_cnt > 0 || files_syncd_cnt > 0)) + { + long t_msecs; + + end_t = GetCurrentTimestamp(); + t_msecs = TimestampDifferenceMilliseconds(start_t, end_t); + + ereport(LOG, + (errmsg("logical decoding rewrite mapping file(s) removed=" UINT64_FORMAT ", synced=" UINT64_FORMAT ", time=%ld.%03d s, cutoff LSN=%X/%X", + files_rmvd_cnt, + files_syncd_cnt, + t_msecs / 1000, (int) (t_msecs % 1000), + LSN_FORMAT_ARGS(cutoff)))); + } } diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 83fca8a77d..e6556ca1b6 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1926,6 +1926,12 @@ CheckPointSnapBuild(void) DIR *snap_dir; struct dirent *snap_de; char path[MAXPGPATH + 21]; + uint64 files_rmvd_cnt = 0; + TimestampTz start_t; + TimestampTz end_t; + + if (log_checkpoints) + start_t = GetCurrentTimestamp(); /* * We start off with a minimum of the last redo pointer. No new @@ -1997,7 +2003,26 @@ CheckPointSnapBuild(void) path))); continue; } + + if (log_checkpoints) + files_rmvd_cnt++; } } FreeDir(snap_dir); + + /* let's emit these stats only when necessary */ + if (log_checkpoints && + files_rmvd_cnt > 0) + { + long t_msecs; + + end_t = GetCurrentTimestamp(); + t_msecs = TimestampDifferenceMilliseconds(start_t, end_t); + + ereport(LOG, + (errmsg("logical decoding snapshot file(s) removed=" UINT64_FORMAT ", time=%ld.%03d s, cutoff LSN=%X/%X", + files_rmvd_cnt, + t_msecs / 1000, (int) (t_msecs % 1000), + LSN_FORMAT_ARGS(cutoff)))); + } } -- 2.25.1