From b5533e3cd5529ddcf6c1c6bc18312e50b58ac49b Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sun, 15 Mar 2026 21:44:58 -0700 Subject: [PATCH v1 1/4] instrumentation: Use Instrumentation struct for parallel workers This simplifies the DSM allocations a bit since we don't need to separately allocate WAL and buffer usage, and allows the easier future addition of a third stack-based struct being discussed. In passing, adjust InstrAccumParallelQuery to handle multiple workers: All callers currently have a loop to call the accumulation for each worker and a local loop variable defined. To slightly simplify the callers, move the loop into the InstrAccumParallelQuery function and add a new "nworkers" argument to it. Author: Lukas Fittl Reviewed-by: Discussion: --- src/backend/access/brin/brin.c | 46 ++++++------------- src/backend/access/gin/gininsert.c | 46 ++++++------------- src/backend/access/nbtree/nbtsort.c | 46 ++++++------------- src/backend/commands/vacuumparallel.c | 53 ++++++++------------- src/backend/executor/execParallel.c | 66 ++++++++++++--------------- src/backend/executor/instrument.c | 27 +++++++---- src/include/executor/execParallel.h | 5 +- src/include/executor/instrument.h | 4 +- 8 files changed, 114 insertions(+), 179 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index bdb30752e09..b04028a3858 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -51,8 +51,7 @@ #define PARALLEL_KEY_BRIN_SHARED UINT64CONST(0xB000000000000001) #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005) +#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xB000000000000004) /* * Status for index builds performed in parallel. This is allocated in a @@ -148,8 +147,7 @@ typedef struct BrinLeader BrinShared *brinshared; Sharedsort *sharedsort; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; } BrinLeader; /* @@ -2387,8 +2385,7 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, BrinShared *brinshared; Sharedsort *sharedsort; BrinLeader *brinleader = palloc0_object(BrinLeader); - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; bool leaderparticipates = true; int querylen; @@ -2430,18 +2427,14 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, shm_toc_estimate_keys(&pcxt->estimator, 2); /* - * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE - * and PARALLEL_KEY_BUFFER_USAGE. + * Estimate space for Instrumentation -- PARALLEL_KEY_INSTRUMENTATION. * * If there are no extensions loaded that care, we could skip this. We * have no way of knowing whether anyone's looking at pgWalUsage or * pgBufferUsage, so do it unconditionally. */ shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); + mul_size(sizeof(Instrumentation), pcxt->nworkers)); shm_toc_estimate_keys(&pcxt->estimator, 1); /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ @@ -2514,15 +2507,12 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, } /* - * Allocate space for each worker's WalUsage and BufferUsage; no need to + * Allocate space for each worker's Instrumentation; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + instr = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(Instrumentation), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, instr); /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -2533,8 +2523,7 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, brinleader->brinshared = brinshared; brinleader->sharedsort = sharedsort; brinleader->snapshot = snapshot; - brinleader->walusage = walusage; - brinleader->bufferusage = bufferusage; + brinleader->instr = instr; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -2563,8 +2552,6 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) { - int i; - /* Shutdown worker processes */ WaitForParallelWorkersToFinish(brinleader->pcxt); @@ -2572,8 +2559,8 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) * Next, accumulate WAL usage. (This must wait for the workers to finish, * or we might get incomplete data.) */ - for (i = 0; i < brinleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&brinleader->bufferusage[i], &brinleader->walusage[i]); + InstrAccumParallelQuery(brinleader->instr, + brinleader->pcxt->nworkers_launched); /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(brinleader->snapshot)) @@ -2887,8 +2874,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *worker_instr; int sortmem; /* @@ -2949,10 +2935,8 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc) heapRel, indexRel, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + worker_instr = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, false); + InstrEndParallelQuery(&worker_instr[ParallelWorkerNumber]); index_close(indexRel, indexLockmode); table_close(heapRel, heapLockmode); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index cb9ed3b563c..c6d144d12f5 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -45,8 +45,7 @@ #define PARALLEL_KEY_GIN_SHARED UINT64CONST(0xB000000000000001) #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005) +#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xB000000000000004) /* * Status for index builds performed in parallel. This is allocated in a @@ -138,8 +137,7 @@ typedef struct GinLeader GinBuildShared *ginshared; Sharedsort *sharedsort; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; } GinLeader; typedef struct @@ -945,8 +943,7 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, GinBuildShared *ginshared; Sharedsort *sharedsort; GinLeader *ginleader = palloc0_object(GinLeader); - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; bool leaderparticipates = true; int querylen; @@ -987,18 +984,14 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, shm_toc_estimate_keys(&pcxt->estimator, 2); /* - * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE - * and PARALLEL_KEY_BUFFER_USAGE. + * Estimate space for Instrumentation -- PARALLEL_KEY_INSTRUMENTATION. * * If there are no extensions loaded that care, we could skip this. We * have no way of knowing whether anyone's looking at pgWalUsage or * pgBufferUsage, so do it unconditionally. */ shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); + mul_size(sizeof(Instrumentation), pcxt->nworkers)); shm_toc_estimate_keys(&pcxt->estimator, 1); /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ @@ -1066,15 +1059,12 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, } /* - * Allocate space for each worker's WalUsage and BufferUsage; no need to + * Allocate space for each worker's Instrumentation; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + instr = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(Instrumentation), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, instr); /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -1085,8 +1075,7 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, ginleader->ginshared = ginshared; ginleader->sharedsort = sharedsort; ginleader->snapshot = snapshot; - ginleader->walusage = walusage; - ginleader->bufferusage = bufferusage; + ginleader->instr = instr; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -1115,8 +1104,6 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state) { - int i; - /* Shutdown worker processes */ WaitForParallelWorkersToFinish(ginleader->pcxt); @@ -1124,8 +1111,8 @@ _gin_end_parallel(GinLeader *ginleader, GinBuildState *state) * Next, accumulate WAL usage. (This must wait for the workers to finish, * or we might get incomplete data.) */ - for (i = 0; i < ginleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]); + InstrAccumParallelQuery(ginleader->instr, + ginleader->pcxt->nworkers_launched); /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(ginleader->snapshot)) @@ -2118,8 +2105,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *worker_instr; int sortmem; /* @@ -2199,10 +2185,8 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) heapRel, indexRel, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + worker_instr = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, false); + InstrEndParallelQuery(&worker_instr[ParallelWorkerNumber]); index_close(indexRel, indexLockmode); table_close(heapRel, heapLockmode); diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 756dfa3dcf4..0e5fa86cf17 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -66,8 +66,7 @@ #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002) #define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xA000000000000005) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000006) +#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xA000000000000005) /* * DISABLE_LEADER_PARTICIPATION disables the leader's participation in @@ -195,8 +194,7 @@ typedef struct BTLeader Sharedsort *sharedsort; Sharedsort *sharedsort2; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; } BTLeader; /* @@ -1408,8 +1406,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) Sharedsort *sharedsort2; BTSpool *btspool = buildstate->spool; BTLeader *btleader = palloc0_object(BTLeader); - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *instr; bool leaderparticipates = true; int querylen; @@ -1462,18 +1459,14 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) } /* - * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE - * and PARALLEL_KEY_BUFFER_USAGE. + * Estimate space for Instrumentation -- PARALLEL_KEY_INSTRUMENTATION. * * If there are no extensions loaded that care, we could skip this. We * have no way of knowing whether anyone's looking at pgWalUsage or * pgBufferUsage, so do it unconditionally. */ shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); + mul_size(sizeof(Instrumentation), pcxt->nworkers)); shm_toc_estimate_keys(&pcxt->estimator, 1); /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ @@ -1560,15 +1553,12 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) } /* - * Allocate space for each worker's WalUsage and BufferUsage; no need to + * Allocate space for each worker's Instrumentation; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + instr = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(Instrumentation), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, instr); /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -1580,8 +1570,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btleader->sharedsort = sharedsort; btleader->sharedsort2 = sharedsort2; btleader->snapshot = snapshot; - btleader->walusage = walusage; - btleader->bufferusage = bufferusage; + btleader->instr = instr; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -1610,8 +1599,6 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) static void _bt_end_parallel(BTLeader *btleader) { - int i; - /* Shutdown worker processes */ WaitForParallelWorkersToFinish(btleader->pcxt); @@ -1619,8 +1606,8 @@ _bt_end_parallel(BTLeader *btleader) * Next, accumulate WAL usage. (This must wait for the workers to finish, * or we might get incomplete data.) */ - for (i = 0; i < btleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&btleader->bufferusage[i], &btleader->walusage[i]); + InstrAccumParallelQuery(btleader->instr, + btleader->pcxt->nworkers_launched); /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(btleader->snapshot)) @@ -1753,8 +1740,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + Instrumentation *worker_instr; int sortmem; #ifdef BTREE_BUILD_STATS @@ -1836,10 +1822,8 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) sharedsort2, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + worker_instr = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, false); + InstrEndParallelQuery(&worker_instr[ParallelWorkerNumber]); #ifdef BTREE_BUILD_STATS if (log_btree_build_stats) diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 41cefcfde54..b2cdab310d6 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -56,9 +56,8 @@ */ #define PARALLEL_VACUUM_KEY_SHARED 1 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 2 -#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 3 -#define PARALLEL_VACUUM_KEY_WAL_USAGE 4 -#define PARALLEL_VACUUM_KEY_INDEX_STATS 5 +#define PARALLEL_VACUUM_KEY_INSTRUMENTATION 3 +#define PARALLEL_VACUUM_KEY_INDEX_STATS 4 /* * Struct for cost-based vacuum delay related parameters to share among an @@ -236,11 +235,8 @@ struct ParallelVacuumState /* Shared dead items space among parallel vacuum workers */ TidStore *dead_items; - /* Points to buffer usage area in DSM */ - BufferUsage *buffer_usage; - - /* Points to WAL usage area in DSM */ - WalUsage *wal_usage; + /* Points to instrumentation area in DSM */ + Instrumentation *instr; /* * False if the index is totally unsuitable target for all parallel @@ -311,8 +307,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, PVShared *shared; TidStore *dead_items; PVIndStats *indstats; - BufferUsage *buffer_usage; - WalUsage *wal_usage; + Instrumentation *instr; bool *will_parallel_vacuum; Size est_indstats_len; Size est_shared_len; @@ -365,18 +360,15 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, shm_toc_estimate_keys(&pcxt->estimator, 1); /* - * Estimate space for BufferUsage and WalUsage -- - * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE. + * Estimate space for Instrumentation -- + * PARALLEL_VACUUM_KEY_INSTRUMENTATION. * * If there are no extensions loaded that care, we could skip this. We * have no way of knowing whether anyone's looking at pgBufferUsage or * pgWalUsage, so do it unconditionally. */ shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); + mul_size(sizeof(Instrumentation), pcxt->nworkers)); shm_toc_estimate_keys(&pcxt->estimator, 1); /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */ @@ -474,17 +466,13 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, pvs->shared = shared; /* - * Allocate space for each worker's BufferUsage and WalUsage; no need to - * initialize + * Allocate space for each worker's Instrumentation; no need to + * initialize. */ - buffer_usage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage); - pvs->buffer_usage = buffer_usage; - wal_usage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage); - pvs->wal_usage = wal_usage; + instr = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(Instrumentation), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INSTRUMENTATION, instr); + pvs->instr = instr; /* Store query string for workers */ if (debug_query_string) @@ -944,8 +932,8 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan /* Wait for all vacuum workers to finish */ WaitForParallelWorkersToFinish(pvs->pcxt); - for (int i = 0; i < pvs->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]); + InstrAccumParallelQuery(pvs->instr, + pvs->pcxt->nworkers_launched); } /* @@ -1202,8 +1190,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) PVIndStats *indstats; PVShared *shared; TidStore *dead_items; - BufferUsage *buffer_usage; - WalUsage *wal_usage; + Instrumentation *worker_instr; int nindexes; char *sharedquery; ErrorContextCallback errcallback; @@ -1311,10 +1298,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) parallel_vacuum_process_safe_indexes(&pvs); /* Report buffer/WAL usage during parallel execution */ - buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false); - wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], - &wal_usage[ParallelWorkerNumber]); + worker_instr = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_INSTRUMENTATION, false); + InstrEndParallelQuery(&worker_instr[ParallelWorkerNumber]); /* Report any remaining cost-based vacuum delay time */ if (track_cost_delay_timing) diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 81b87d82fab..89e717a1c50 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -60,13 +60,12 @@ #define PARALLEL_KEY_EXECUTOR_FIXED UINT64CONST(0xE000000000000001) #define PARALLEL_KEY_PLANNEDSTMT UINT64CONST(0xE000000000000002) #define PARALLEL_KEY_PARAMLISTINFO UINT64CONST(0xE000000000000003) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xE000000000000004) +#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000004) #define PARALLEL_KEY_TUPLE_QUEUE UINT64CONST(0xE000000000000005) -#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000006) +#define PARALLEL_KEY_NODE_INSTRUMENTATION UINT64CONST(0xE000000000000006) #define PARALLEL_KEY_DSA UINT64CONST(0xE000000000000007) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008) #define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xE00000000000000A) #define PARALLEL_TUPLE_QUEUE_SIZE 65536 @@ -662,8 +661,6 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, char *pstmt_data; char *pstmt_space; char *paramlistinfo_space; - BufferUsage *bufusage_space; - WalUsage *walusage_space; SharedExecutorInstrumentation *instrumentation = NULL; SharedJitInstrumentation *jit_instrumentation = NULL; int pstmt_len; @@ -727,21 +724,14 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_estimate_keys(&pcxt->estimator, 1); /* - * Estimate space for BufferUsage. + * Estimate space for Instrumentation. * * If EXPLAIN is not in use and there are no extensions loaded that care, * we could skip this. But we have no way of knowing whether anyone's * looking at pgBufferUsage, so do it unconditionally. */ shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - - /* - * Same thing for WalUsage. - */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); + mul_size(sizeof(Instrumentation), pcxt->nworkers)); shm_toc_estimate_keys(&pcxt->estimator, 1); /* Estimate space for tuple queues. */ @@ -827,17 +817,18 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARAMLISTINFO, paramlistinfo_space); SerializeParamList(estate->es_param_list_info, ¶mlistinfo_space); - /* Allocate space for each worker's BufferUsage; no need to initialize. */ - bufusage_space = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufusage_space); - pei->buffer_usage = bufusage_space; + /* + * Allocate space for each worker's Instrumentation; no need to + * initialize. + */ + { + Instrumentation *instr; - /* Same for WalUsage. */ - walusage_space = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage_space); - pei->wal_usage = walusage_space; + instr = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(Instrumentation), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, instr); + pei->instrumentation = instr; + } /* Set up the tuple queues that the workers will write into. */ pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false); @@ -863,9 +854,9 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, instrument = GetInstrumentationArray(instrumentation); for (i = 0; i < nworkers * e.nnodes; ++i) InstrInitNode(&instrument[i], estate->es_instrument, false); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTRUMENTATION, + shm_toc_insert(pcxt->toc, PARALLEL_KEY_NODE_INSTRUMENTATION, instrumentation); - pei->instrumentation = instrumentation; + pei->node_instrumentation = instrumentation; if (estate->es_jit_flags != PGJIT_NONE) { @@ -1258,8 +1249,7 @@ ExecParallelFinish(ParallelExecutorInfo *pei) * Next, accumulate buffer/WAL usage. (This must wait for the workers to * finish, or we might get incomplete data.) */ - for (i = 0; i < nworkers; i++) - InstrAccumParallelQuery(&pei->buffer_usage[i], &pei->wal_usage[i]); + InstrAccumParallelQuery(pei->instrumentation, nworkers); pei->finished = true; } @@ -1273,10 +1263,10 @@ ExecParallelFinish(ParallelExecutorInfo *pei) void ExecParallelCleanup(ParallelExecutorInfo *pei) { - /* Accumulate instrumentation, if any. */ - if (pei->instrumentation) + /* Accumulate node instrumentation, if any. */ + if (pei->node_instrumentation) ExecParallelRetrieveInstrumentation(pei->planstate, - pei->instrumentation); + pei->node_instrumentation); /* Accumulate JIT instrumentation, if any. */ if (pei->jit_instrumentation) @@ -1514,8 +1504,6 @@ void ParallelQueryMain(dsm_segment *seg, shm_toc *toc) { FixedParallelExecutorState *fpes; - BufferUsage *buffer_usage; - WalUsage *wal_usage; DestReceiver *receiver; QueryDesc *queryDesc; SharedExecutorInstrumentation *instrumentation; @@ -1530,7 +1518,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) /* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */ receiver = ExecParallelGetReceiver(seg, toc); - instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, true); + instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_NODE_INSTRUMENTATION, true); if (instrumentation != NULL) instrument_options = instrumentation->instrument_options; jit_instrumentation = shm_toc_lookup(toc, PARALLEL_KEY_JIT_INSTRUMENTATION, @@ -1588,10 +1576,12 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) ExecutorFinish(queryDesc); /* Report buffer/WAL usage during parallel execution. */ - buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - wal_usage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], - &wal_usage[ParallelWorkerNumber]); + { + Instrumentation *worker_instr; + + worker_instr = shm_toc_lookup(toc, PARALLEL_KEY_INSTRUMENTATION, false); + InstrEndParallelQuery(&worker_instr[ParallelWorkerNumber]); + } /* Report instrumentation data if any instrumentation options are set. */ if (instrumentation != NULL) diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index ffbcd572133..20431e64fb4 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -284,20 +284,29 @@ InstrStartParallelQuery(void) /* report usage after parallel executor shutdown */ void -InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage) +InstrEndParallelQuery(Instrumentation *instr) { - memset(bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(bufusage, &pgBufferUsage, &save_pgBufferUsage); - memset(walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(walusage, &pgWalUsage, &save_pgWalUsage); + memset(&instr->bufusage, 0, sizeof(BufferUsage)); + BufferUsageAccumDiff(&instr->bufusage, &pgBufferUsage, &save_pgBufferUsage); + memset(&instr->walusage, 0, sizeof(WalUsage)); + WalUsageAccumDiff(&instr->walusage, &pgWalUsage, &save_pgWalUsage); } -/* accumulate work done by workers in leader's stats */ +/* + * Accumulate work done by parallel workers in the leader's stats. + * + * instr points to the per-worker Instrumentation array the leader allocated in + * DSM; each of the nworkers launched workers reported into its own slot via + * InstrEndParallelQuery. Must be called only after the workers have finished. + */ void -InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage) +InstrAccumParallelQuery(Instrumentation *instr, int nworkers) { - BufferUsageAdd(&pgBufferUsage, bufusage); - WalUsageAdd(&pgWalUsage, walusage); + for (int i = 0; i < nworkers; i++) + { + BufferUsageAdd(&pgBufferUsage, &instr[i].bufusage); + WalUsageAdd(&pgWalUsage, &instr[i].walusage); + } } /* dst += add */ diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h index 5a2034811d5..6c8b602d07f 100644 --- a/src/include/executor/execParallel.h +++ b/src/include/executor/execParallel.h @@ -25,9 +25,8 @@ typedef struct ParallelExecutorInfo { PlanState *planstate; /* plan subtree we're running in parallel */ ParallelContext *pcxt; /* parallel context we're using */ - BufferUsage *buffer_usage; /* points to bufusage area in DSM */ - WalUsage *wal_usage; /* walusage area in DSM */ - SharedExecutorInstrumentation *instrumentation; /* optional */ + Instrumentation *instrumentation; /* instrumentation area in DSM */ + SharedExecutorInstrumentation *node_instrumentation; /* optional */ struct SharedJitInstrumentation *jit_instrumentation; /* optional */ dsa_area *area; /* points to DSA area in DSM */ dsa_pointer param_exec; /* serialized PARAM_EXEC parameters */ diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index f093a52aae0..8fab0e17fd0 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -148,8 +148,8 @@ extern void InstrStartTrigger(TriggerInstrumentation *tginstr); extern void InstrStopTrigger(TriggerInstrumentation *tginstr, int64 firings); extern void InstrStartParallelQuery(void); -extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage); -extern void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage); +extern void InstrEndParallelQuery(Instrumentation *instr); +extern void InstrAccumParallelQuery(Instrumentation *instr, int nworkers); extern void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub); extern void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, -- 2.47.1