From dc675031bd6132d60ee3dfe3860bfdc2def901b4 Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Tue, 2 Jun 2026 20:29:31 +0000 Subject: [PATCH 1/2] vacuumlo: Find OID references inside domains, composites, and arrays. Previously, vacuumlo only recognized columns whose type was directly named 'oid' or 'lo'. Columns using domains over oid, composite types containing oid fields, or arrays of these were silently ignored, causing referenced large objects to be incorrectly removed. Use a recursive CTE to discover all paths to oid through domains, composites, and arrays. For servers older than v14, retain the original logic since those are EOL. While at it, add a TAP test covering orphan removal, which was previously untested. Discussion: https://postgr.es/m/CALsgZNAM=AYK-9ZLR7Z0YLx6Lyx5aSrjjse3T+FmsJ=jTvfhDQ@mail.gmail.com --- contrib/vacuumlo/meson.build | 1 + contrib/vacuumlo/t/002_orphan_remove.pl | 109 ++++++++++++++++++++++++ contrib/vacuumlo/vacuumlo.c | 104 +++++++++++++--------- doc/src/sgml/vacuumlo.sgml | 7 +- 4 files changed, 179 insertions(+), 42 deletions(-) create mode 100644 contrib/vacuumlo/t/002_orphan_remove.pl diff --git a/contrib/vacuumlo/meson.build b/contrib/vacuumlo/meson.build index 4ee5b048575..dac0b49272f 100644 --- a/contrib/vacuumlo/meson.build +++ b/contrib/vacuumlo/meson.build @@ -24,6 +24,7 @@ tests += { 'tap': { 'tests': [ 't/001_basic.pl', + 't/002_orphan_remove.pl', ], }, } diff --git a/contrib/vacuumlo/t/002_orphan_remove.pl b/contrib/vacuumlo/t/002_orphan_remove.pl new file mode 100644 index 00000000000..def3d407a23 --- /dev/null +++ b/contrib/vacuumlo/t/002_orphan_remove.pl @@ -0,0 +1,109 @@ +# Copyright (c) 2021-2026, PostgreSQL Global Development Group + +# This tests that vacuumlo correctly removes orphaned large objects while +# preserving references stored in oid columns, lo columns, and complex types +# built over either. +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; + +my $dbname = 'vacuumlo_test'; +$node->safe_psql('postgres', "CREATE DATABASE $dbname"); + +# Install the lo extension (creates the lo type, recognized by name). +$node->safe_psql($dbname, "CREATE EXTENSION lo"); + +# Create composite types containing lo fields. +$node->safe_psql($dbname, q{ + CREATE TYPE comp_with_lo AS (label text, loid lo); + CREATE TYPE nested_comp AS (label text, inner_comp comp_with_lo); +}); + +# Create tables exercising various patterns. +$node->safe_psql($dbname, q{ + CREATE TABLE t_plain_oid (id serial, loid oid); + CREATE TABLE t_lo (id serial, loid lo); + CREATE TABLE t_comp (id serial, val comp_with_lo); + CREATE TABLE t_nested_comp (id serial, val nested_comp); + CREATE TABLE t_array_oid (id serial, loids oid[]); + CREATE TABLE t_array_lo (id serial, loids lo[]); + CREATE TABLE t_array_comp (id serial, vals comp_with_lo[]); +}); + +# Create large objects: lo1..lo9 will be referenced, lo10..lo14 are orphans. +my @lo_oids; +for my $i (1 .. 14) +{ + my $oid = $node->safe_psql($dbname, "SELECT lo_create(0)"); + push @lo_oids, $oid; +} + +# lo1: plain oid column +$node->safe_psql($dbname, + "INSERT INTO t_plain_oid (loid) VALUES ('$lo_oids[0]')"); + +# lo2: lo extension type +$node->safe_psql($dbname, + "INSERT INTO t_lo (loid) VALUES ('$lo_oids[1]')"); + +# lo3: composite type with lo field +$node->safe_psql($dbname, + "INSERT INTO t_comp (val) VALUES (ROW('a', '$lo_oids[2]')::comp_with_lo)"); + +# lo4: nested composite +$node->safe_psql($dbname, + "INSERT INTO t_nested_comp (val) VALUES (ROW('b', ROW('c', '$lo_oids[3]')::comp_with_lo)::nested_comp)"); + +# lo5, lo6: array of oid +$node->safe_psql($dbname, + "INSERT INTO t_array_oid (loids) VALUES (ARRAY['$lo_oids[4]', '$lo_oids[5]']::oid[])"); + +# lo7: array of lo +$node->safe_psql($dbname, + "INSERT INTO t_array_lo (loids) VALUES (ARRAY['$lo_oids[6]']::lo[])"); + +# lo8, lo9: array of composite with lo +$node->safe_psql($dbname, + "INSERT INTO t_array_comp (vals) VALUES (ARRAY[ROW('d', '$lo_oids[7]'), ROW('e', '$lo_oids[8]')]::comp_with_lo[])"); + +# Verify all 14 large objects exist before vacuumlo. +my $count_before = $node->safe_psql($dbname, + "SELECT count(*) FROM pg_largeobject_metadata"); +is($count_before, '14', 'all 14 large objects exist before vacuumlo'); + +# Run vacuumlo — assert success and check removal message. +command_like( + ['vacuumlo', '-v', $node->connstr($dbname)], + qr/Successfully removed 5 large objects/, + 'vacuumlo removes orphan large objects'); + +# lo10..lo14 (indices 9..13) should have been removed. +my $count_after = $node->safe_psql($dbname, + "SELECT count(*) FROM pg_largeobject_metadata"); +is($count_after, '9', 'only 9 referenced large objects remain after vacuumlo'); + +# Verify each referenced LO still exists. +for my $i (0 .. 8) +{ + my $exists = $node->safe_psql($dbname, + "SELECT count(*) FROM pg_largeobject_metadata WHERE oid = $lo_oids[$i]"); + is($exists, '1', "referenced lo (index $i, oid $lo_oids[$i]) still exists"); +} + +# Verify each orphan LO was removed. +for my $i (9 .. 13) +{ + my $exists = $node->safe_psql($dbname, + "SELECT count(*) FROM pg_largeobject_metadata WHERE oid = $lo_oids[$i]"); + is($exists, '0', "orphan lo (index $i, oid $lo_oids[$i]) was removed"); +} + +$node->stop; +done_testing(); diff --git a/contrib/vacuumlo/vacuumlo.c b/contrib/vacuumlo/vacuumlo.c index 8102569466b..f5131bf2bed 100644 --- a/contrib/vacuumlo/vacuumlo.c +++ b/contrib/vacuumlo/vacuumlo.c @@ -29,7 +29,7 @@ #include "libpq-fe.h" #include "pg_getopt.h" -#define BUFSIZE 1024 +#define BUFSIZE 4096 enum trivalue { @@ -182,24 +182,72 @@ vacuumlo(const char *database, const struct _param *param) PQclear(res); /* - * Now find any candidate tables that have columns of type oid. + * Now find any candidate tables that have columns of type oid (or domains + * over oid, or composite types containing oid fields, recursively). * * NOTE: we ignore system tables and temp tables by the expedient of * rejecting tables in schemas named 'pg_*'. In particular, the temp * table formed above is ignored, and pg_largeobject will be too. If * either of these were scanned, obviously we'd end up with nothing to * delete... + * + * The query returns (schema, table, expr) where expr is a SQL expression + * built server-side with quote_ident(). For example, expr may be: loid, + * (val).loid, ((val).inner_comp).loid, or unnest(loids). */ buf[0] = '\0'; - strcat(buf, "SELECT s.nspname, c.relname, a.attname "); - strcat(buf, "FROM pg_class c, pg_attribute a, pg_namespace s, pg_type t "); - strcat(buf, "WHERE a.attnum > 0 AND NOT a.attisdropped "); - strcat(buf, " AND a.attrelid = c.oid "); - strcat(buf, " AND a.atttypid = t.oid "); - strcat(buf, " AND c.relnamespace = s.oid "); - strcat(buf, " AND t.typname in ('oid', 'lo') "); - strcat(buf, " AND c.relkind in (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ")"); - strcat(buf, " AND s.nspname !~ '^pg_'"); + if (PQserverVersion(conn) >= 140000) + { + strcat(buf, "WITH RECURSIVE expressions(schema, tbl, expr, typid) AS (" + " SELECT quote_ident(s.nspname), quote_ident(c.relname), " + " quote_ident(a.attname), a.atttypid " + " FROM pg_class c " + " JOIN pg_attribute a ON a.attrelid = c.oid " + " AND a.attnum > 0 AND NOT a.attisdropped " + " JOIN pg_namespace s ON c.relnamespace = s.oid " + " WHERE c.relkind IN (" + CppAsString2(RELKIND_RELATION) ", " + CppAsString2(RELKIND_MATVIEW) ") " + " AND s.nspname !~ '^pg_' " + " UNION ALL " + " SELECT d.schema, d.tbl, x.expr, x.typid " + " FROM expressions d, " + " LATERAL (" + " SELECT d.expr, t.typbasetype AS typid " + " FROM pg_type t " + " WHERE t.oid = d.typid AND t.typtype = 'd' " + " UNION ALL " + " SELECT 'unnest(' || d.expr || ')', t.typelem AS typid " + " FROM pg_type t " + " WHERE t.oid = d.typid AND t.typelem != 0 " + " AND t.typlen = -1 " + " UNION ALL " + " SELECT '(' || d.expr || ').' || quote_ident(a.attname), " + " a.atttypid " + " FROM pg_type t " + " JOIN pg_attribute a ON a.attrelid = t.typrelid " + " AND a.attnum > 0 AND NOT a.attisdropped " + " WHERE t.oid = d.typid " + " AND t.typtype = 'c' AND t.typrelid != 0 " + " ) x" + ") " + "SELECT schema, tbl, expr " + "FROM expressions WHERE typid = 26 " + " OR typid IN (SELECT oid FROM pg_type" + " WHERE typname = 'lo')"); + } + else + { + strcat(buf, "SELECT quote_ident(s.nspname), quote_ident(c.relname), quote_ident(a.attname) "); + strcat(buf, "FROM pg_class c, pg_attribute a, pg_namespace s, pg_type t "); + strcat(buf, "WHERE a.attnum > 0 AND NOT a.attisdropped "); + strcat(buf, " AND a.attrelid = c.oid "); + strcat(buf, " AND a.atttypid = t.oid "); + strcat(buf, " AND c.relnamespace = s.oid "); + strcat(buf, " AND t.typname in ('oid', 'lo') "); + strcat(buf, " AND c.relkind in (" CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ")"); + strcat(buf, " AND s.nspname !~ '^pg_'"); + } res = PQexec(conn, buf); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -213,52 +261,31 @@ vacuumlo(const char *database, const struct _param *param) { char *schema, *table, - *field; + *expr; schema = PQgetvalue(res, i, 0); table = PQgetvalue(res, i, 1); - field = PQgetvalue(res, i, 2); + expr = PQgetvalue(res, i, 2); if (param->verbose) - fprintf(stdout, "Checking %s in %s.%s\n", field, schema, table); - - schema = PQescapeIdentifier(conn, schema, strlen(schema)); - table = PQescapeIdentifier(conn, table, strlen(table)); - field = PQescapeIdentifier(conn, field, strlen(field)); - - if (!schema || !table || !field) - { - pg_log_error("%s", PQerrorMessage(conn)); - PQclear(res); - PQfinish(conn); - PQfreemem(schema); - PQfreemem(table); - PQfreemem(field); - return -1; - } + fprintf(stdout, "Checking %s in %s.%s\n", expr, schema, table); snprintf(buf, BUFSIZE, "DELETE FROM vacuum_l " "WHERE lo IN (SELECT %s FROM %s.%s)", - field, schema, table); + expr, schema, table); + res2 = PQexec(conn, buf); if (PQresultStatus(res2) != PGRES_COMMAND_OK) { pg_log_error("failed to check %s in table %s.%s: %s", - field, schema, table, PQerrorMessage(conn)); + expr, schema, table, PQerrorMessage(conn)); PQclear(res2); PQclear(res); PQfinish(conn); - PQfreemem(schema); - PQfreemem(table); - PQfreemem(field); return -1; } PQclear(res2); - - PQfreemem(schema); - PQfreemem(table); - PQfreemem(field); } PQclear(res); @@ -542,3 +569,4 @@ main(int argc, char **argv) return rc; } + diff --git a/doc/src/sgml/vacuumlo.sgml b/doc/src/sgml/vacuumlo.sgml index 26b764d54b7..94b5c5fae14 100644 --- a/doc/src/sgml/vacuumlo.sgml +++ b/doc/src/sgml/vacuumlo.sgml @@ -213,10 +213,9 @@ First, vacuumlo builds a temporary table which contains all of the OIDs of the large objects in the selected database. It then scans through all columns in the database that are of type - oid or lo, and removes matching entries from the temporary - table. (Note: Only types with these names are considered; in particular, - domains over them are not considered.) The remaining entries in the - temporary table identify orphaned LOs. These are removed. + oid or lo, including domains, composite types, and arrays + containing these types. Matching entries are removed from the temporary table. + The remaining entries in the temporary table identify orphaned LOs. These are removed. -- 2.53.0