From 1c2009a823e1922e3a1a2b280626abbe11a86696 Mon Sep 17 00:00:00 2001 From: Samba Siva Date: Fri, 29 May 2026 18:05:45 +0530 Subject: [PATCH] Add hook for plugins to acquire sample rows during ANALYZE - Introduced AcquireSampleRowsFunc_hook for extensions to override row sampling. - Updated analyze.c to utilize the hook if registered. - Added tests to ensure ANALYZE completes without errors with the new hook. --- doc/src/sgml/xfunc.sgml | 10 ++++++++++ src/backend/commands/analyze.c | 24 ++++++++++++++++++++---- src/include/commands/vacuum.h | 11 +++++++++++ src/test/regress/expected/analyze.out | 20 ++++++++++++++++++++ src/test/regress/parallel_schedule | 1 + src/test/regress/sql/analyze.sql | 24 ++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 src/test/regress/expected/analyze.out create mode 100644 src/test/regress/sql/analyze.sql diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index 1eb5abffd8..f1ef025ae2 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -4203,4 +4203,14 @@ supportfn(internal) returns internal To create such conditions, the support function must implement the SupportRequestIndexCondition request type. + + + The hook variable AcquireSampleRowsFunc_hook allows extensions + to override the row sampling function during ANALYZE for regular + heap relations. This is useful for extensions/systems which implement distributed + databases and want to sample rows from remote nodes instead of the local heap. + The hook function fills the provided rows[] buffer with at most + targrows heap tuples and sets *totalrows to the + estimated total live row count of the relation. + diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4fffb76e55..3560acdff1 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -74,6 +74,8 @@ int default_statistics_target = 100; static MemoryContext anl_context = NULL; static BufferAccessStrategy vac_strategy; +/* Hook for plugins to acquire sample rows for ANALYZE */ +AcquireSampleRowsFunc_hook_type AcquireSampleRowsFunc_hook = NULL; static void do_analyze_rel(Relation onerel, VacuumParams *params, List *va_cols, @@ -188,8 +190,15 @@ analyze_rel(Oid relid, RangeVar *relation, if (onerel->rd_rel->relkind == RELKIND_RELATION || onerel->rd_rel->relkind == RELKIND_MATVIEW) { - /* Regular table, so we'll use the regular row acquisition function */ - acquirefunc = acquire_sample_rows; + /* + * Regular table, so we'll use the regular row acquisition function. + * If a plugin has registered a hook to acquire sample rows, use it; + * otherwise use the default function. + */ + if (AcquireSampleRowsFunc_hook) + acquirefunc = AcquireSampleRowsFunc_hook; + else + acquirefunc = acquire_sample_rows; /* Also get regular table's size */ relpages = RelationGetNumberOfBlocks(onerel); } @@ -1467,8 +1476,15 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, if (childrel->rd_rel->relkind == RELKIND_RELATION || childrel->rd_rel->relkind == RELKIND_MATVIEW) { - /* Regular table, so use the regular row acquisition function */ - acquirefunc = acquire_sample_rows; + /* + * Regular table, so use the regular row acquisition function. + * If a plugin has registered a hook to acquire sample rows, use it; + * otherwise use the default function. + */ + if (AcquireSampleRowsFunc_hook) + acquirefunc = AcquireSampleRowsFunc_hook; + else + acquirefunc = acquire_sample_rows; relpages = RelationGetNumberOfBlocks(childrel); } else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bc37a80dc7..146f936861 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -21,6 +21,7 @@ #include "catalog/pg_class.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" +#include "foreign/fdwapi.h" #include "parser/parse_node.h" #include "storage/buf.h" #include "storage/lock.h" @@ -113,6 +114,9 @@ typedef void (*AnalyzeAttrComputeStatsFunc) (VacAttrStatsP stats, int samplerows, double totalrows); +/* Hook type for plugins to acquire sample rows for ANALYZE */ +typedef AcquireSampleRowsFunc AcquireSampleRowsFunc_hook_type; + typedef struct VacAttrStats { /* @@ -334,6 +338,13 @@ extern PGDLLIMPORT int vacuum_cost_limit; extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; +/* + * Hook for plugins to override row sampling during ANALYZE. + * Also applies to child relations of partitioned/inherited tables. + * See acquire_sample_rows() in src/backend/commands/analyze.c. + */ +extern PGDLLIMPORT AcquireSampleRowsFunc_hook_type AcquireSampleRowsFunc_hook; + /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); extern void vacuum(List *relations, VacuumParams *params, diff --git a/src/test/regress/expected/analyze.out b/src/test/regress/expected/analyze.out new file mode 100644 index 0000000000..5bb6bbc45b --- /dev/null +++ b/src/test/regress/expected/analyze.out @@ -0,0 +1,20 @@ +-- Test AcquireSampleRowsFunc_hook +-- Usually this would be tested via a C extension. +-- Here we just confirm this does not break the existing ANALYZE code +-- by verifying that ANALYZE completes without error. +CREATE TABLE employees ( + id SERIAL PRIMARY KEY, + name TEXT, + department TEXT, + salary NUMERIC +); +INSERT INTO employees + SELECT + i, + 'Employee ' || i, + 'Department ' || (i % 5), + (i % 100) * 1000 + 50000 + FROM generate_series(1, 1000) i; +-- Should complete without error +ANALYZE employees; +DROP TABLE employees; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e1e0c54019..71978ef858 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -94,6 +94,7 @@ test: vacuum_parallel # Run this alone, because concurrent DROP TABLE would make non-superuser # "ANALYZE;" fail with "relation with OID $n does not exist". test: maintain_every +test: analyze # no relation related tests can be put in this group test: publication subscription diff --git a/src/test/regress/sql/analyze.sql b/src/test/regress/sql/analyze.sql new file mode 100644 index 0000000000..e0bc84e9f9 --- /dev/null +++ b/src/test/regress/sql/analyze.sql @@ -0,0 +1,24 @@ +-- Test AcquireSampleRowsFunc_hook +-- Usually this would be tested via a C extension. +-- Here we just confirm this does not break the existing ANALYZE code +-- by verifying that ANALYZE completes without error. + +CREATE TABLE employees ( + id SERIAL PRIMARY KEY, + name TEXT, + department TEXT, + salary NUMERIC +); + +INSERT INTO employees + SELECT + i, + 'Employee ' || i, + 'Department ' || (i % 5), + (i % 100) * 1000 + 50000 + FROM generate_series(1, 1000) i; + +-- Should complete without error +ANALYZE employees; + +DROP TABLE employees; -- 2.50.1 (Apple Git-155)