From: | Andrew Bille <andrewbille(at)gmail(dot)com> |
---|---|
To: | andrewbille(at)gmail(dot)com, pgsql-bugs(at)lists(dot)postgresql(dot)org |
Subject: | Re: BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables. |
Date: | 2025-10-14 08:15:36 |
Message-ID: | CAJnzarxwswnHPC8aLopWOEj3vpr-fgzhOwgzSznvCewF8ZvfGg@mail.gmail.com |
Views: | Whole Thread | Raw Message | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-bugs |
Patches for 17- and 18+
Regards, Andrew
On Tue, Oct 14, 2025 at 3:14 PM PG Bug reporting form <
noreply(at)postgresql(dot)org> wrote:
> The following bug has been logged on the website:
>
> Bug reference: 19086
> Logged by: Andrew Bille
> Email address: andrewbille(at)gmail(dot)com
> PostgreSQL version: 18.0
> Operating system: Debian 12
> Description:
>
> In case of system indexes corruption the collecting of index definitions
> can
> take a really long time.
>
> Synthetic example:
>
> DO $$
> DECLARE
> i INTEGER;
> j INTEGER;
> BEGIN
> FOR i IN 1..15000 LOOP
> EXECUTE 'CREATE TABLE tab' i ' as SELECT 1 as f';
> FOR j IN 1..5 LOOP
> EXECUTE 'CREATE index idx_tab' i '_' j ' ON tab' i
> '(f)';
> END LOOP;
> END LOOP;
> END;
> $$;
>
> If
> ignore_system_indexes = on
>
> time pg_dump --data-only test > test.sql
>
> real 62m44,582s
> user 0m0,576s
> sys 0m0,259s
>
> of which
> LOG: duration: 3474423.683 ms statement: SELECT t.tableoid, t.oid,
> i.indrelid, t.relname AS indexname, t.relpages, t.reltuples,
> t.relallvisible, 0 AS relallfrozen,
> pg_catalog.pg_get_indexdef(i.indexrelid)
> AS indexdef, i.indkey, i.indisclustered, c.contype, c.conname,
> c.condeferrable, c.condeferred, c.tableoid AS contableoid, c.oid AS conoid,
> pg_catalog.pg_get_constraintdef(c.oid, false) AS condef, CASE WHEN
> i.indexprs IS NOT NULL THEN (SELECT pg_catalog.array_agg(attname ORDER BY
> attnum) FROM pg_catalog.pg_attribute WHERE attrelid = i.indexrelid) ELSE
> NULL END AS indattnames, (SELECT spcname FROM pg_catalog.pg_tablespace s
> WHERE s.oid = t.reltablespace) AS tablespace, t.reloptions AS
> indreloptions,
> i.indisreplident, inh.inhparent AS parentidx, i.indnkeyatts AS indnkeyatts,
> i.indnatts AS indnatts, (SELECT pg_catalog.array_agg(attnum ORDER BY
> attnum)
> FROM pg_catalog.pg_attribute WHERE attrelid = i.indexrelid AND
> attstattarget >= 0) AS indstatcols, (SELECT
> pg_catalog.array_agg(attstattarget ORDER BY attnum) FROM
> pg_catalog.pg_attribute WHERE attrelid = i.indexrelid AND
> attstattarget >= 0) AS indstatvals, i.indnullsnotdistinct, NULL AS
> conperiod
> FROM
>
> unnest('{16385,16393,16401,16409,16417,16425,16433,16441,16449,16457,16465,16473,16481,16489,16497,16505,16513,16521,16529,16537,16545,16553,16561,16569,16577,16585,16593,16601,16609,16617,16625,16633,16641,16649,16657,16665,16673,16681,16689,16697,16705,16713,16721,16729,16737,16745,16753,16761,16769,16777,16785,16793,16801,16809,16817,16825,16833,16841,16849,16857,16865,16873,16881,16889,16897,16905,16913,16921,16929,16937,16945,16953,16961,16969,16977,16985,16993,17001,17009,17017,17025,17033,17041,17049,17057,17065,17073,17081,17089,17097,17105,17113,17121,17129,17137,17145,17153,17161,17169,17177,17185,17193,17201,17209,17217
> ... 36361,136369,136377}'::pg_catalog.oid[]) AS src(tbloid)
> JOIN pg_catalog.pg_index i ON (src.tbloid = i.indrelid) JOIN
> pg_catalog.pg_class t ON (t.oid = i.indexrelid) JOIN pg_catalog.pg_class t2
> ON (t2.oid = i.indrelid) LEFT JOIN pg_catalog.pg_constraint c ON
> (i.indrelid
> = c.conrelid AND i.indexrelid = c.conindid AND c.contype IN ('p','u','x'))
> LEFT JOIN pg_catalog.pg_inherits inh ON (inh.inhrelid = indexrelid) WHERE
> (i.indisvalid OR t2.relkind = 'p') AND i.indisready ORDER BY i.indrelid,
> indexname
>
> With a simple patch (passes tests)
>
> diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
> index a1976fae607..471e62da735 100644
> --- a/src/bin/pg_dump/common.c
> +++ b/src/bin/pg_dump/common.c
> @@ -95,7 +95,7 @@ static IndxInfo *findIndexByOid(Oid oid);
> * Collect information about all potentially dumpable objects
> */
> TableInfo *
> -getSchemaData(Archive *fout, int *numTablesPtr)
> +getSchemaData(Archive *fout, int *numTablesPtr, bool dataOnly)
> {
> TableInfo *tblinfo;
> ExtensionInfo *extinfo;
> @@ -211,11 +211,14 @@ getSchemaData(Archive *fout, int *numTablesPtr)
> pg_log_info("reading partitioning data");
> getPartitioningInfo(fout);
>
> - pg_log_info("reading indexes");
> - getIndexes(fout, tblinfo, numTables);
> + if (!dataOnly)
> + {
> + pg_log_info("reading indexes");
> + getIndexes(fout, tblinfo, numTables);
>
> - pg_log_info("flagging indexes in partitioned tables");
> - flagInhIndexes(fout, tblinfo, numTables);
> + pg_log_info("flagging indexes in partitioned tables");
> + flagInhIndexes(fout, tblinfo, numTables);
> + }
>
> pg_log_info("reading extended statistics");
> getExtendedStatistics(fout);
> diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
> index 641bece12c7..ef8bd786371 100644
> --- a/src/bin/pg_dump/pg_dump.c
> +++ b/src/bin/pg_dump/pg_dump.c
> @@ -1090,7 +1090,7 @@ main(int argc, char **argv)
> * Now scan the database and create DumpableObject structs for all
> the
> * objects we intend to dump.
> */
> - tblinfo = getSchemaData(fout, &numTables);
> + tblinfo = getSchemaData(fout, &numTables, data_only);
>
> if (dopt.dumpData)
> {
> diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
> index fa6d1a510f7..83e097404a3 100644
> --- a/src/bin/pg_dump/pg_dump.h
> +++ b/src/bin/pg_dump/pg_dump.h
> @@ -750,7 +750,7 @@ typedef struct _SubRelInfo
> * common utility functions
> */
>
> -extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr);
> +extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr, bool
> dataOnly);
>
> extern void AssignDumpId(DumpableObject *dobj);
> extern void recordAdditionalCatalogID(CatalogId catId, DumpableObject
> *dobj);
>
>
> we have:
>
> time pg_dump --data-only test > test.sql
>
> real 7m45,533s
> user 0m0,726s
> sys 0m0,634s
>
> In the real case of system indexes corruption ... dump can take enourmous
> amount of time.
>
>
Attachment | Content-Type | Size |
---|---|---|
pg_dump_data_only_no_index_master.patch | text/x-patch | 2.0 KB |
pg_dump_data_only_no_index_13-17.patch | text/x-patch | 2.1 KB |
From | Date | Subject | |
---|---|---|---|
Next Message | Heikki Linnakangas | 2025-10-14 11:39:26 | Re: BUG #19080: CancelRequest message documentation bug |
Previous Message | PG Bug reporting form | 2025-10-14 08:13:52 | BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables. |