From 6a2705d2084abd9b6b304c460cf17a4c96c1fdad Mon Sep 17 00:00:00 2001 From: Satya Narlapuram Date: Sun, 3 May 2026 17:37:24 +0000 Subject: [PATCH] Fix ANALYZE crash on extended stats with virtual generated columns When a table has extended statistics involving expressions on virtual generated columns, ANALYZE can fail if evaluating those expressions raises an error (e.g. division by zero). This happens because the generated column expression is evaluated directly during statistics computation. Fix by wrapping expression evaluation in PG_TRY/PG_CATCH blocks: - In make_build_data() and compute_expr_stats(), add PG_TRY blocks that clean up executor resources (slot, estate) before re-throwing, preventing resource leaks on error. - In BuildRelationExtStatistics(), catch errors from expression evaluation, issue a WARNING with the statistics object name and error details, then continue processing the remaining statistics objects. This ensures ANALYZE completes successfully even when some extended statistics objects cannot be computed due to expression evaluation errors. --- src/backend/statistics/extended_stats.c | 238 +++++++++++++++--------- src/test/regress/expected/stats_ext.out | 29 +++ src/test/regress/sql/stats_ext.sql | 20 ++ 3 files changed, 195 insertions(+), 92 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2b83355d..88cea81b 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -194,41 +194,73 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows, if (stattarget == 0) continue; - /* evaluate expressions (if the statistics object has any) */ - data = make_build_data(onerel, stat, numrows, rows, stats, stattarget); - - /* compute statistic of each requested type */ - foreach(lc2, stat->types) + /* + * Wrap expression evaluation and stats computation in PG_TRY so + * that errors from evaluating expressions (e.g. division by zero + * in virtual generated columns) don't cause ANALYZE to fail + * entirely. Skip the statistics object and issue a WARNING + * instead. + */ + PG_TRY(); { - char t = (char) lfirst_int(lc2); - - if (t == STATS_EXT_NDISTINCT) - ndistinct = statext_ndistinct_build(totalrows, data); - else if (t == STATS_EXT_DEPENDENCIES) - dependencies = statext_dependencies_build(data); - else if (t == STATS_EXT_MCV) - mcv = statext_mcv_build(data, totalrows, stattarget); - else if (t == STATS_EXT_EXPRESSIONS) + /* evaluate expressions (if the statistics has any) */ + data = make_build_data(onerel, stat, numrows, rows, + stats, stattarget); + + /* compute statistic of each requested type */ + foreach(lc2, stat->types) { - AnlExprData *exprdata; - int nexprs; + char t = (char) lfirst_int(lc2); + + if (t == STATS_EXT_NDISTINCT) + ndistinct = statext_ndistinct_build(totalrows, data); + else if (t == STATS_EXT_DEPENDENCIES) + dependencies = statext_dependencies_build(data); + else if (t == STATS_EXT_MCV) + mcv = statext_mcv_build(data, totalrows, stattarget); + else if (t == STATS_EXT_EXPRESSIONS) + { + AnlExprData *exprdata; + int nexprs; - /* should not happen, thanks to checks when defining stats */ - if (!stat->exprs) - elog(ERROR, "requested expression stats, but there are no expressions"); + /* should not happen, thanks to checks when defining stats */ + if (!stat->exprs) + elog(ERROR, "requested expression stats, but there are no expressions"); - exprdata = build_expr_data(stat->exprs, stattarget); - nexprs = list_length(stat->exprs); + exprdata = build_expr_data(stat->exprs, stattarget); + nexprs = list_length(stat->exprs); - compute_expr_stats(onerel, exprdata, nexprs, rows, numrows); + compute_expr_stats(onerel, exprdata, nexprs, + rows, numrows); - exprstats = serialize_expr_stats(exprdata, nexprs); + exprstats = serialize_expr_stats(exprdata, nexprs); + } } - } - /* store the statistics in the catalog */ - statext_store(stat->statOid, inh, - ndistinct, dependencies, mcv, exprstats, stats); + /* store the statistics in the catalog */ + statext_store(stat->statOid, inh, + ndistinct, dependencies, mcv, exprstats, stats); + } + PG_CATCH(); + { + ErrorData *edata; + + /* Save the error, issue a WARNING and continue */ + MemoryContextSwitchTo(cxt); + edata = CopyErrorData(); + FlushErrorState(); + + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("skipping statistics object \"%s.%s\" for relation \"%s.%s\"", + stat->schema, stat->name, + get_namespace_name(onerel->rd_rel->relnamespace), + RelationGetRelationName(onerel)), + errdetail("%s", edata->message))); + + FreeErrorData(edata); + } + PG_END_TRY(); /* for reporting progress */ pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED, @@ -2185,46 +2217,58 @@ compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs, exprnulls = (bool *) palloc(numrows * sizeof(bool)); tcnt = 0; - for (i = 0; i < numrows; i++) + PG_TRY(); { - Datum datum; - bool isnull; + for (i = 0; i < numrows; i++) + { + Datum datum; + bool isnull; - /* - * Reset the per-tuple context each time, to reclaim any cruft - * left behind by evaluating the statistics expressions. - */ - ResetExprContext(econtext); + /* + * Reset the per-tuple context each time, to reclaim any + * cruft left behind by evaluating the statistics + * expressions. + */ + ResetExprContext(econtext); - /* Set up for expression evaluation */ - ExecStoreHeapTuple(rows[i], slot, false); + /* Set up for expression evaluation */ + ExecStoreHeapTuple(rows[i], slot, false); - /* - * Evaluate the expression. We do this in the per-tuple context so - * as not to leak memory, and then copy the result into the - * context created at the beginning of this function. - */ - datum = ExecEvalExprSwitchContext(exprstate, - GetPerTupleExprContext(estate), - &isnull); - if (isnull) - { - exprvals[tcnt] = (Datum) 0; - exprnulls[tcnt] = true; - } - else - { - /* Make sure we copy the data into the context. */ - Assert(CurrentMemoryContext == expr_context); + /* + * Evaluate the expression. We do this in the per-tuple + * context so as not to leak memory, and then copy the + * result into the context created at the beginning of + * this function. + */ + datum = ExecEvalExprSwitchContext(exprstate, + GetPerTupleExprContext(estate), + &isnull); + if (isnull) + { + exprvals[tcnt] = (Datum) 0; + exprnulls[tcnt] = true; + } + else + { + /* Make sure we copy the data into the context. */ + Assert(CurrentMemoryContext == expr_context); - exprvals[tcnt] = datumCopy(datum, - stats->attrtype->typbyval, - stats->attrtype->typlen); - exprnulls[tcnt] = false; - } + exprvals[tcnt] = datumCopy(datum, + stats->attrtype->typbyval, + stats->attrtype->typlen); + exprnulls[tcnt] = false; + } - tcnt++; + tcnt++; + } + } + PG_CATCH(); + { + ExecDropSingleTupleTableSlot(slot); + FreeExecutorState(estate); + PG_RE_THROW(); } + PG_END_TRY(); /* * Now we can compute the statistics for the expression columns. @@ -2624,46 +2668,56 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, /* Set up expression evaluation state */ exprstates = ExecPrepareExprList(stat->exprs, estate); - for (i = 0; i < numrows; i++) + PG_TRY(); { - /* - * Reset the per-tuple context each time, to reclaim any cruft left - * behind by evaluating the statistics object expressions. - */ - ResetExprContext(econtext); - - /* Set up for expression evaluation */ - ExecStoreHeapTuple(rows[i], slot, false); - - idx = bms_num_members(stat->columns); - foreach(lc, exprstates) + for (i = 0; i < numrows; i++) { - Datum datum; - bool isnull; - ExprState *exprstate = (ExprState *) lfirst(lc); - /* - * XXX This probably leaks memory. Maybe we should use - * ExecEvalExprSwitchContext but then we need to copy the result - * somewhere else. + * Reset the per-tuple context each time, to reclaim any cruft + * left behind by evaluating the statistics object expressions. */ - datum = ExecEvalExpr(exprstate, - GetPerTupleExprContext(estate), - &isnull); - if (isnull) - { - result->values[idx][i] = (Datum) 0; - result->nulls[idx][i] = true; - } - else + ResetExprContext(econtext); + + /* Set up for expression evaluation */ + ExecStoreHeapTuple(rows[i], slot, false); + + idx = bms_num_members(stat->columns); + foreach(lc, exprstates) { - result->values[idx][i] = datum; - result->nulls[idx][i] = false; - } + Datum datum; + bool isnull; + ExprState *exprstate = (ExprState *) lfirst(lc); - idx++; + /* + * XXX This probably leaks memory. Maybe we should use + * ExecEvalExprSwitchContext but then we need to copy the + * result somewhere else. + */ + datum = ExecEvalExpr(exprstate, + GetPerTupleExprContext(estate), + &isnull); + if (isnull) + { + result->values[idx][i] = (Datum) 0; + result->nulls[idx][i] = true; + } + else + { + result->values[idx][i] = datum; + result->nulls[idx][i] = false; + } + + idx++; + } } } + PG_CATCH(); + { + ExecDropSingleTupleTableSlot(slot); + FreeExecutorState(estate); + PG_RE_THROW(); + } + PG_END_TRY(); ExecDropSingleTupleTableSlot(slot); FreeExecutorState(estate); diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 37070c1a..43892f4f 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3207,6 +3207,14 @@ SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE w = 0' (1 row) DROP TABLE virtual_gen_stats; +-- extended statistics on virtual generated columns whose expressions can error +CREATE TABLE virtual_gen_err (a int, b int GENERATED ALWAYS AS (a / 0) VIRTUAL); +INSERT INTO virtual_gen_err VALUES (1), (2), (3); +CREATE STATISTICS virtual_gen_err_s ON a, b FROM virtual_gen_err; +ANALYZE virtual_gen_err; -- should warn, not fail +WARNING: skipping statistics object "public.virtual_gen_err_s" for relation "public.virtual_gen_err" +DETAIL: division by zero +DROP TABLE virtual_gen_err; -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in -- the underlying table. @@ -3709,3 +3717,24 @@ SELECT range_length_histogram, range_empty_frac, range_bounds_histogram (1 row) DROP TABLE stats_ext_tbl_range; +-- Expression statistics with errors (not involving virtual generated columns). +-- Errors during expression evaluation should produce a WARNING, not a failure, +-- and non-erroring statistics objects should still be computed. +CREATE TABLE expr_err (a int); +INSERT INTO expr_err VALUES (1), (2), (3); +CREATE STATISTICS expr_err_s1 ON ((a/0)) FROM expr_err; +CREATE STATISTICS expr_err_s2 ON (a/0),(a+1) FROM expr_err; +CREATE STATISTICS expr_err_s3 ON ((a+1)) FROM expr_err; +ANALYZE expr_err; -- should warn, not fail +WARNING: skipping statistics object "public.expr_err_s1" for relation "public.expr_err" +DETAIL: division by zero +WARNING: skipping statistics object "public.expr_err_s2" for relation "public.expr_err" +DETAIL: division by zero +SELECT statistics_name from pg_stats_ext x + WHERE tablename = 'expr_err' ORDER BY ROW(x.*); + statistics_name +----------------- + expr_err_s3 +(1 row) + +DROP TABLE expr_err; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3cc6012b..f78fd902 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1608,6 +1608,13 @@ SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE w = 0' DROP TABLE virtual_gen_stats; +-- extended statistics on virtual generated columns whose expressions can error +CREATE TABLE virtual_gen_err (a int, b int GENERATED ALWAYS AS (a / 0) VIRTUAL); +INSERT INTO virtual_gen_err VALUES (1), (2), (3); +CREATE STATISTICS virtual_gen_err_s ON a, b FROM virtual_gen_err; +ANALYZE virtual_gen_err; -- should warn, not fail +DROP TABLE virtual_gen_err; + -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in -- the underlying table. @@ -1908,3 +1915,16 @@ SELECT range_length_histogram, range_empty_frac, range_bounds_histogram FROM pg_stats_ext_exprs WHERE statistics_name = 'stats_ext_range'; DROP TABLE stats_ext_tbl_range; + +-- Expression statistics with errors (not involving virtual generated columns). +-- Errors during expression evaluation should produce a WARNING, not a failure, +-- and non-erroring statistics objects should still be computed. +CREATE TABLE expr_err (a int); +INSERT INTO expr_err VALUES (1), (2), (3); +CREATE STATISTICS expr_err_s1 ON ((a/0)) FROM expr_err; +CREATE STATISTICS expr_err_s2 ON (a/0),(a+1) FROM expr_err; +CREATE STATISTICS expr_err_s3 ON ((a+1)) FROM expr_err; +ANALYZE expr_err; -- should warn, not fail +SELECT statistics_name from pg_stats_ext x + WHERE tablename = 'expr_err' ORDER BY ROW(x.*); +DROP TABLE expr_err; -- 2.43.0