From: | PG Bug reporting form <noreply(at)postgresql(dot)org> |
---|---|
To: | pgsql-bugs(at)lists(dot)postgresql(dot)org |
Cc: | andrewbille(at)gmail(dot)com |
Subject: | BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables. |
Date: | 2025-10-14 08:13:52 |
Message-ID: | 19086-871ff11017d03dc5@postgresql.org |
Views: | Whole Thread | Raw Message | Download mbox | Resend email |
Thread: | |
Lists: | pgsql-bugs |
The following bug has been logged on the website:
Bug reference: 19086
Logged by: Andrew Bille
Email address: andrewbille(at)gmail(dot)com
PostgreSQL version: 18.0
Operating system: Debian 12
Description:
In case of system indexes corruption the collecting of index definitions can
take a really long time.
Synthetic example:
DO $$
DECLARE
i INTEGER;
j INTEGER;
BEGIN
FOR i IN 1..15000 LOOP
EXECUTE 'CREATE TABLE tab' i ' as SELECT 1 as f';
FOR j IN 1..5 LOOP
EXECUTE 'CREATE index idx_tab' i '_' j ' ON tab' i '(f)';
END LOOP;
END LOOP;
END;
$$;
If
ignore_system_indexes = on
time pg_dump --data-only test > test.sql
real 62m44,582s
user 0m0,576s
sys 0m0,259s
of which
LOG: duration: 3474423.683 ms statement: SELECT t.tableoid, t.oid,
i.indrelid, t.relname AS indexname, t.relpages, t.reltuples,
t.relallvisible, 0 AS relallfrozen, pg_catalog.pg_get_indexdef(i.indexrelid)
AS indexdef, i.indkey, i.indisclustered, c.contype, c.conname,
c.condeferrable, c.condeferred, c.tableoid AS contableoid, c.oid AS conoid,
pg_catalog.pg_get_constraintdef(c.oid, false) AS condef, CASE WHEN
i.indexprs IS NOT NULL THEN (SELECT pg_catalog.array_agg(attname ORDER BY
attnum) FROM pg_catalog.pg_attribute WHERE attrelid = i.indexrelid) ELSE
NULL END AS indattnames, (SELECT spcname FROM pg_catalog.pg_tablespace s
WHERE s.oid = t.reltablespace) AS tablespace, t.reloptions AS indreloptions,
i.indisreplident, inh.inhparent AS parentidx, i.indnkeyatts AS indnkeyatts,
i.indnatts AS indnatts, (SELECT pg_catalog.array_agg(attnum ORDER BY attnum)
FROM pg_catalog.pg_attribute WHERE attrelid = i.indexrelid AND
attstattarget >= 0) AS indstatcols, (SELECT
pg_catalog.array_agg(attstattarget ORDER BY attnum) FROM
pg_catalog.pg_attribute WHERE attrelid = i.indexrelid AND
attstattarget >= 0) AS indstatvals, i.indnullsnotdistinct, NULL AS conperiod
FROM
unnest('{16385,16393,16401,16409,16417,16425,16433,16441,16449,16457,16465,16473,16481,16489,16497,16505,16513,16521,16529,16537,16545,16553,16561,16569,16577,16585,16593,16601,16609,16617,16625,16633,16641,16649,16657,16665,16673,16681,16689,16697,16705,16713,16721,16729,16737,16745,16753,16761,16769,16777,16785,16793,16801,16809,16817,16825,16833,16841,16849,16857,16865,16873,16881,16889,16897,16905,16913,16921,16929,16937,16945,16953,16961,16969,16977,16985,16993,17001,17009,17017,17025,17033,17041,17049,17057,17065,17073,17081,17089,17097,17105,17113,17121,17129,17137,17145,17153,17161,17169,17177,17185,17193,17201,17209,17217
... 36361,136369,136377}'::pg_catalog.oid[]) AS src(tbloid)
JOIN pg_catalog.pg_index i ON (src.tbloid = i.indrelid) JOIN
pg_catalog.pg_class t ON (t.oid = i.indexrelid) JOIN pg_catalog.pg_class t2
ON (t2.oid = i.indrelid) LEFT JOIN pg_catalog.pg_constraint c ON (i.indrelid
= c.conrelid AND i.indexrelid = c.conindid AND c.contype IN ('p','u','x'))
LEFT JOIN pg_catalog.pg_inherits inh ON (inh.inhrelid = indexrelid) WHERE
(i.indisvalid OR t2.relkind = 'p') AND i.indisready ORDER BY i.indrelid,
indexname
With a simple patch (passes tests)
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index a1976fae607..471e62da735 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -95,7 +95,7 @@ static IndxInfo *findIndexByOid(Oid oid);
* Collect information about all potentially dumpable objects
*/
TableInfo *
-getSchemaData(Archive *fout, int *numTablesPtr)
+getSchemaData(Archive *fout, int *numTablesPtr, bool dataOnly)
{
TableInfo *tblinfo;
ExtensionInfo *extinfo;
@@ -211,11 +211,14 @@ getSchemaData(Archive *fout, int *numTablesPtr)
pg_log_info("reading partitioning data");
getPartitioningInfo(fout);
- pg_log_info("reading indexes");
- getIndexes(fout, tblinfo, numTables);
+ if (!dataOnly)
+ {
+ pg_log_info("reading indexes");
+ getIndexes(fout, tblinfo, numTables);
- pg_log_info("flagging indexes in partitioned tables");
- flagInhIndexes(fout, tblinfo, numTables);
+ pg_log_info("flagging indexes in partitioned tables");
+ flagInhIndexes(fout, tblinfo, numTables);
+ }
pg_log_info("reading extended statistics");
getExtendedStatistics(fout);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 641bece12c7..ef8bd786371 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1090,7 +1090,7 @@ main(int argc, char **argv)
* Now scan the database and create DumpableObject structs for all
the
* objects we intend to dump.
*/
- tblinfo = getSchemaData(fout, &numTables);
+ tblinfo = getSchemaData(fout, &numTables, data_only);
if (dopt.dumpData)
{
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index fa6d1a510f7..83e097404a3 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -750,7 +750,7 @@ typedef struct _SubRelInfo
* common utility functions
*/
-extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr);
+extern TableInfo *getSchemaData(Archive *fout, int *numTablesPtr, bool
dataOnly);
extern void AssignDumpId(DumpableObject *dobj);
extern void recordAdditionalCatalogID(CatalogId catId, DumpableObject
*dobj);
we have:
time pg_dump --data-only test > test.sql
real 7m45,533s
user 0m0,726s
sys 0m0,634s
In the real case of system indexes corruption ... dump can take enourmous
amount of time.
From | Date | Subject | |
---|---|---|---|
Next Message | Andrew Bille | 2025-10-14 08:15:36 | Re: BUG #19086: pg_dump --data-only selects and do not uses index definitions for the dumped tables. |
Previous Message | Hayato Kuroda (Fujitsu) | 2025-10-14 06:53:00 | RE: Logical replication fails when partition column order differs from parent |