From 232bc8a73711c743e74d964abfb202563e176aa5 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 10 Oct 2025 12:28:37 -0500 Subject: [PATCH v4 1/1] autovacuum scheduling improvements --- src/backend/postmaster/autovacuum.c | 142 +++++++++++++++++++++++++--- src/tools/pgindent/typedefs.list | 1 + 2 files changed, 128 insertions(+), 15 deletions(-) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 5084af7dfb6..e8cd414c445 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -97,6 +97,7 @@ #include "storage/procsignal.h" #include "storage/smgr.h" #include "tcop/tcopprot.h" +#include "utils/float.h" #include "utils/fmgroids.h" #include "utils/fmgrprotos.h" #include "utils/guc_hooks.h" @@ -310,6 +311,12 @@ static AutoVacuumShmemStruct *AutoVacuumShmem; static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList); static MemoryContext DatabaseListCxt = NULL; +typedef struct +{ + Oid oid; + double score; +} TableToProcess; + /* * Dummy pointer to persuade Valgrind that we've not leaked the array of * avl_dbase structs. Make it global to ensure the compiler doesn't @@ -351,7 +358,8 @@ static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, int effective_multixact_freeze_max_age, - bool *dovacuum, bool *doanalyze, bool *wraparound); + bool *dovacuum, bool *doanalyze, bool *wraparound, + double *score); static void autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy); @@ -1889,6 +1897,15 @@ get_database_list(void) return dblist; } +static int +TableToProcessComparator(const ListCell *a, const ListCell *b) +{ + TableToProcess *t1 = (TableToProcess *) lfirst(a); + TableToProcess *t2 = (TableToProcess *) lfirst(b); + + return float8_cmp_internal(t2->score, t1->score); +} + /* * Process a database table-by-table * @@ -1902,7 +1919,7 @@ do_autovacuum(void) HeapTuple tuple; TableScanDesc relScan; Form_pg_database dbForm; - List *table_oids = NIL; + List *tables_to_process = NIL; List *orphan_oids = NIL; HASHCTL ctl; HTAB *table_toast_map; @@ -2014,6 +2031,7 @@ do_autovacuum(void) bool dovacuum; bool doanalyze; bool wraparound; + double score = 0.0; if (classForm->relkind != RELKIND_RELATION && classForm->relkind != RELKIND_MATVIEW) @@ -2054,11 +2072,19 @@ do_autovacuum(void) /* Check if it needs vacuum or analyze */ relation_needs_vacanalyze(relid, relopts, classForm, tabentry, effective_multixact_freeze_max_age, - &dovacuum, &doanalyze, &wraparound); + &dovacuum, &doanalyze, &wraparound, + &score); - /* Relations that need work are added to table_oids */ + /* Relations that need work are added to tables_to_process */ if (dovacuum || doanalyze) - table_oids = lappend_oid(table_oids, relid); + { + TableToProcess *table = palloc(sizeof(TableToProcess)); + + table->oid = relid; + table->score = score; + + tables_to_process = lappend(tables_to_process, table); + } /* * Remember TOAST associations for the second pass. Note: we must do @@ -2114,6 +2140,7 @@ do_autovacuum(void) bool dovacuum; bool doanalyze; bool wraparound; + double score = 0.0; /* * We cannot safely process other backends' temp tables, so skip 'em. @@ -2146,11 +2173,19 @@ do_autovacuum(void) relation_needs_vacanalyze(relid, relopts, classForm, tabentry, effective_multixact_freeze_max_age, - &dovacuum, &doanalyze, &wraparound); + &dovacuum, &doanalyze, &wraparound, + &score); /* ignore analyze for toast tables */ if (dovacuum) - table_oids = lappend_oid(table_oids, relid); + { + TableToProcess *table = palloc(sizeof(TableToProcess)); + + table->oid = relid; + table->score = score; + + tables_to_process = lappend(tables_to_process, table); + } /* Release stuff to avoid leakage */ if (free_relopts) @@ -2274,6 +2309,8 @@ do_autovacuum(void) MemoryContextSwitchTo(AutovacMemCxt); } + list_sort(tables_to_process, TableToProcessComparator); + /* * Optionally, create a buffer access strategy object for VACUUM to use. * We use the same BufferAccessStrategy object for all tables VACUUMed by @@ -2302,9 +2339,9 @@ do_autovacuum(void) /* * Perform operations on collected tables. */ - foreach(cell, table_oids) + foreach_ptr(TableToProcess, table, tables_to_process) { - Oid relid = lfirst_oid(cell); + Oid relid = table->oid; HeapTuple classTup; autovac_table *tab; bool isshared; @@ -2535,7 +2572,7 @@ deleted: pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance); } - list_free(table_oids); + list_free_deep(tables_to_process); /* * Perform additional work items, as requested by backends. @@ -2934,6 +2971,7 @@ recheck_relation_needs_vacanalyze(Oid relid, bool *wraparound) { PgStat_StatTabEntry *tabentry; + double score; /* fetch the pgstat table entry */ tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared, @@ -2941,7 +2979,8 @@ recheck_relation_needs_vacanalyze(Oid relid, relation_needs_vacanalyze(relid, avopts, classForm, tabentry, effective_multixact_freeze_max_age, - dovacuum, doanalyze, wraparound); + dovacuum, doanalyze, wraparound, + &score); /* Release tabentry to avoid leakage */ if (tabentry) @@ -2990,6 +3029,32 @@ recheck_relation_needs_vacanalyze(Oid relid, * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor * value < 0 is substituted with the value of * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze. + * + * This function also returns a score that can be used to sort the list of + * tables to process. The idea is to have autovacuum prioritize tables that + * are furthest beyond their thresholds (e.g., a table nearing transaction ID + * wraparound should be vacuumed first). This prioritization scheme is + * certainly far from perfect; there are simply too many possibilities for any + * scoring technique to work across all workloads, and the situation might + * change significantly between the time we calculate the score and the time + * that autovacuum gets to processing it. However, we have attempted to + * develop something that is expected to work for a large portion of workloads + * with reasonable parameter settings. + * + * The score is calculated as the maximum of the ratios of each of the table's + * relevant values to its threshold. For example, if the number of inserted + * tuples is 100, and the insert threshold for the table is 80, the insert + * score is 1.25. If all other scores are below that value, the returned score + * will be 1.25. The other criteria considered for the score are the table + * ages (both relfrozenxid and relminmxid) compared to the corresponding + * freeze-max-age setting, the number of updated/deleted tuples compared to the + * vacuum threshold, and the number of inserted/updated/deleted tuples compared + * to the analyze threshold. + * + * One exception to the previous paragraph is for tables nearing wraparound, + * i.e., those that have surpassed the effective failsafe ages. In that case, + * the relfrozen/relminmxid-based score is scaled aggressively so that the + * table has a decent chance of sorting to the top of the list. */ static void relation_needs_vacanalyze(Oid relid, @@ -3000,7 +3065,8 @@ relation_needs_vacanalyze(Oid relid, /* output params below */ bool *dovacuum, bool *doanalyze, - bool *wraparound) + bool *wraparound, + double *score) { bool force_vacuum; bool av_enabled; @@ -3029,7 +3095,14 @@ relation_needs_vacanalyze(Oid relid, int multixact_freeze_max_age; TransactionId xidForceLimit; TransactionId relfrozenxid; + TransactionId relminmxid; MultiXactId multiForceLimit; + uint32 xid_age; + uint32 mxid_age; + double xid_score; + double mxid_score; + int effective_xid_failsafe_age; + int effective_mxid_failsafe_age; Assert(classForm != NULL); Assert(OidIsValid(relid)); @@ -3081,17 +3154,17 @@ relation_needs_vacanalyze(Oid relid, av_enabled = (relopts ? relopts->enabled : true); + relfrozenxid = classForm->relfrozenxid; + relminmxid = classForm->relminmxid; + /* Force vacuum if table is at risk of wraparound */ xidForceLimit = recentXid - freeze_max_age; if (xidForceLimit < FirstNormalTransactionId) xidForceLimit -= FirstNormalTransactionId; - relfrozenxid = classForm->relfrozenxid; force_vacuum = (TransactionIdIsNormal(relfrozenxid) && TransactionIdPrecedes(relfrozenxid, xidForceLimit)); if (!force_vacuum) { - MultiXactId relminmxid = classForm->relminmxid; - multiForceLimit = recentMulti - multixact_freeze_max_age; if (multiForceLimit < FirstMultiXactId) multiForceLimit -= FirstMultiXactId; @@ -3100,6 +3173,33 @@ relation_needs_vacanalyze(Oid relid, } *wraparound = force_vacuum; + /* + * To calculate the (M)XID age portion of the score, divide the age by its + * respective *_freeze_max_age parameter. + */ + xid_age = TransactionIdIsNormal(relfrozenxid) ? relfrozenxid - recentXid : 0; + mxid_age = MultiXactIdIsValid(relminmxid) ? relminmxid - recentMulti : 0; + + xid_score = (double) xid_age / freeze_max_age; + mxid_score = (double) mxid_age / multixact_freeze_max_age; + + /* + * To ensure tables are given increased priority once they begin + * approaching wraparound, we scale the score aggressively if the ages + * surpass vacuum_failsafe_age or vacuum_multixact_failsafe_age. + */ + effective_xid_failsafe_age = Max(vacuum_failsafe_age, + autovacuum_freeze_max_age * 1.05); + effective_mxid_failsafe_age = Max(vacuum_multixact_failsafe_age, + autovacuum_multixact_freeze_max_age * 1.05); + + if (xid_age >= effective_xid_failsafe_age) + xid_score = pow(xid_score, Max(1.0, (double) xid_age / 100000000)); + if (mxid_age >= effective_mxid_failsafe_age) + mxid_score = pow(mxid_score, Max(1.0, (double) mxid_age / 100000000)); + + *score = Max(xid_score, mxid_score); + /* User disabled it in pg_class.reloptions? (But ignore if at risk) */ if (!av_enabled && !force_vacuum) { @@ -3173,6 +3273,18 @@ relation_needs_vacanalyze(Oid relid, *dovacuum = force_vacuum || (vactuples > vacthresh) || (vac_ins_base_thresh >= 0 && instuples > vacinsthresh); *doanalyze = (anltuples > anlthresh); + + /* + * Update the score, being careful to avoid division by zero and to + * skip cases where auto-analyze does not apply, i.e., pg_statistic + * and TOAST tables. + */ + *score = Max(*score, (double) vactuples / Max(vacthresh, 1)); + if (relid != StatisticRelationId && + classForm->relkind != RELKIND_TOASTVALUE) + *score = Max(*score, (double) anltuples / Max(anlthresh, 1)); + if (vac_ins_base_thresh >= 0) + *score = Max(*score, (double) instuples / Max(vacinsthresh, 1)); } else { diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 43fe3bcd593..3e0739c7808 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3008,6 +3008,7 @@ TableScanDesc TableScanDescData TableSpaceCacheEntry TableSpaceOpts +TableToProcess TablespaceList TablespaceListCell TapeBlockTrailer -- 2.39.5 (Apple Git-154)