BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: exclusion(at)gmail(dot)com
Subject: BUG #17255: Server crashes in index_delete_sort_cmp() due to race condition with vacuum
Date: 2021-10-29 07:00:01
Message-ID: 17255-14c0ac58d0f9b583@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 17255
Logged by: Alexander Lakhin
Email address: exclusion(at)gmail(dot)com
PostgreSQL version: 14.0
Operating system: Ubuntu 20.04
Description:

The following scenario:
###
createdb regression
export PGDATABASE=regression

echo "
-- excerpt from inherit.sql
CREATE TABLE errtst_parent (
partid int not null,
shdata int not null,
data int NOT NULL DEFAULT 0,
CONSTRAINT shdata_small CHECK(shdata < 3)
) PARTITION BY RANGE (partid);

CREATE TABLE errtst_child_plaindef (
partid int not null,
shdata int not null,
data int NOT NULL DEFAULT 0,
CONSTRAINT shdata_small CHECK(shdata < 3),
CHECK(data < 10)
);

CREATE TABLE errtst_child_reorder (
data int NOT NULL DEFAULT 0,
shdata int not null,
partid int not null,
CONSTRAINT shdata_small CHECK(shdata < 3),
CHECK(data < 10)
);

ALTER TABLE errtst_parent ATTACH PARTITION errtst_child_plaindef FOR VALUES
FROM (10) TO (20);
ALTER TABLE errtst_parent ATTACH PARTITION errtst_child_reorder FOR VALUES
FROM (20) TO (30);

DROP TABLE errtst_parent;
" >/tmp/mini-inherit.sql

echo "
-- excerpt from vacuum.sql
CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);
SELECT pg_sleep(random()/500);
CREATE INDEX tmp_idx1 ON tmp (a);
" >/tmp/mini-vacuum.sql

echo "
VACUUM (skip_locked, index_cleanup off) pg_catalog.pg_class;
SELECT pg_sleep(random()/500);
" >/tmp/pseudo-autovacuum.sql

pgbench -n -f /tmp/mini-vacuum.sql -f /tmp/pseudo-autovacuum.sql -C -c 40 -T
600 >/dev/null 2>&1 &
pgbench -n -f /tmp/mini-inherit.sql -C -c 1 -T 600 >/dev/null 2>&1 &

wait
###
with the settings:
autovacuum=off
fsync=off
in postgresql.conf

causes the server crash:
TIME PID UID GID SIG COREFILE EXE
Fri 2021-10-29 09:37:09 MSK 383805 1000 1000 6 present
.../usr/local/pgsql/bin/postgres

real 3m20,335s
user 0m7,245s
sys 0m8,306s

with the following stack:
Core was generated by `postgres: law regression [local] CREATE INDEX
'.
Program terminated with signal SIGABRT, Aborted.
#0 __GI_raise (sig=sig(at)entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig(at)entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1 0x00007f8a7f97a859 in __GI_abort () at abort.c:79
#2 0x0000562dabb49700 in index_delete_sort_cmp (deltid2=<synthetic
pointer>, deltid1=<optimized out>) at heapam.c:7582
#3 index_delete_sort (delstate=0x7fff6f609f10, delstate=0x7fff6f609f10) at
heapam.c:7623
#4 heap_index_delete_tuples (rel=0x7f8a76523e08, delstate=0x7fff6f609f10)
at heapam.c:7296
#5 0x0000562dabc5519a in table_index_delete_tuples
(delstate=0x7fff6f609f10, rel=0x562dac23d6c2)
at ../../../../src/include/access/tableam.h:1327
#6 _bt_delitems_delete_check (rel=rel(at)entry=0x7f8a7652cc80,
buf=buf(at)entry=191, heapRel=heapRel(at)entry=0x7f8a76523e08,
delstate=delstate(at)entry=0x7fff6f609f10) at nbtpage.c:1541
#7 0x0000562dabc4dbe1 in _bt_simpledel_pass (maxoff=<optimized out>,
minoff=<optimized out>, newitem=<optimized out>,
ndeletable=55, deletable=0x7fff6f609f30, heapRel=0x7f8a76523e08,
buffer=191, rel=0x7f8a7652cc80)
at nbtinsert.c:2899
#8 _bt_delete_or_dedup_one_page (rel=0x7f8a7652cc80,
heapRel=0x7f8a76523e08, insertstate=0x7fff6f60a340,
simpleonly=<optimized out>, checkingunique=<optimized out>,
uniquedup=<optimized out>, indexUnchanged=false)
at nbtinsert.c:2712
#9 0x0000562dabc523f3 in _bt_findinsertloc (heapRel=0x7f8a76523e08,
stack=0x562dad8c1320, indexUnchanged=false,
checkingunique=true, insertstate=0x7fff6f60a340, rel=0x7f8a7652cc80) at
nbtinsert.c:904
#10 _bt_doinsert (rel=rel(at)entry=0x7f8a7652cc80,
itup=itup(at)entry=0x562dad8b9f20,
checkUnique=checkUnique(at)entry=UNIQUE_CHECK_YES,
indexUnchanged=indexUnchanged(at)entry=false,
heapRel=heapRel(at)entry=0x7f8a76523e08) at nbtinsert.c:255
#11 0x0000562dabc58451 in btinsert (rel=0x7f8a7652cc80, values=<optimized
out>, isnull=<optimized out>,
ht_ctid=0x562dad81f7d4, heapRel=0x7f8a76523e08,
checkUnique=UNIQUE_CHECK_YES, indexUnchanged=false,
indexInfo=0x562dad8b9ad8) at nbtree.c:199
#12 0x0000562dabcc0f14 in CatalogIndexInsert
(indstate=indstate(at)entry=0x562dad8c0fb0,
heapTuple=heapTuple(at)entry=0x562dad81f7d0) at indexing.c:158
#13 0x0000562dabcc119f in CatalogTupleInsert (heapRel=0x7f8a76523e08,
tup=0x562dad81f7d0) at indexing.c:231
#14 0x0000562dabcb92a6 in InsertPgClassTuple
(pg_class_desc=pg_class_desc(at)entry=0x7f8a76523e08,
new_rel_desc=new_rel_desc(at)entry=0x7f8a7654cd28, new_rel_oid=<optimized
out>, relacl=relacl(at)entry=0,
reloptions=reloptions(at)entry=0) at heap.c:986
#15 0x0000562dabcbec8d in index_create
(heapRelation=heapRelation(at)entry=0x7f8a7654c6a0,
indexRelationName=indexRelationName(at)entry=0x562dad7f7ff8 "tmp_idx1",
indexRelationId=571743,
indexRelationId(at)entry=0, parentIndexRelid=parentIndexRelid(at)entry=0,
parentConstraintId=parentConstraintId(at)entry=0,
relFileNode=<optimized out>, indexInfo=<optimized out>,
indexColNames=<optimized out>,
accessMethodObjectId=<optimized out>, tableSpaceId=<optimized out>,
collationObjectId=<optimized out>,
classObjectId=<optimized out>, coloptions=<optimized out>,
reloptions=<optimized out>, flags=<optimized out>,
constr_flags=<optimized out>, allow_system_table_mods=<optimized out>,
is_internal=<optimized out>,
constraintId=<optimized out>) at index.c:968
#16 0x0000562dabd533f8 in DefineIndex (relationId=relationId(at)entry=571674,
stmt=stmt(at)entry=0x562dad7f8168,
indexRelationId=indexRelationId(at)entry=0,
parentIndexId=parentIndexId(at)entry=0,
parentConstraintId=parentConstraintId(at)entry=0,
is_alter_table=is_alter_table(at)entry=false, check_rights=true,
check_not_in_use=true, skip_build=false, quiet=false) at
indexcmds.c:1137
#17 0x0000562dabf41217 in ProcessUtilitySlow (pstate=0x562dad8c0c80,
pstmt=0x562dad7f8518,
queryString=0x562dad7f75e0 "CREATE INDEX tmp_idx1 ON tmp (a);",
context=PROCESS_UTILITY_TOPLEVEL, params=0x0,
queryEnv=0x0, qc=0x7fff6f60b2a0, dest=<optimized out>) at
utility.c:1534
#18 0x0000562dabf3fd23 in standard_ProcessUtility (pstmt=0x562dad7f8518,
queryString=0x562dad7f75e0 "CREATE INDEX tmp_idx1 ON tmp (a);",
readOnlyTree=<optimized out>,
context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0,
dest=0x562dad7f8608, qc=0x7fff6f60b2a0)
at utility.c:1066
#19 0x0000562dabf3e3f1 in PortalRunUtility
(portal=portal(at)entry=0x562dad85b040, pstmt=pstmt(at)entry=0x562dad7f8518,
isTopLevel=isTopLevel(at)entry=true,
setHoldSnapshot=setHoldSnapshot(at)entry=false, dest=dest(at)entry=0x562dad7f8608,

qc=qc(at)entry=0x7fff6f60b2a0) at pquery.c:1155
#20 0x0000562dabf3e52d in PortalRunMulti
(portal=portal(at)entry=0x562dad85b040, isTopLevel=isTopLevel(at)entry=true,
setHoldSnapshot=setHoldSnapshot(at)entry=false,
dest=dest(at)entry=0x562dad7f8608, altdest=altdest(at)entry=0x562dad7f8608,
qc=qc(at)entry=0x7fff6f60b2a0) at pquery.c:1312
#21 0x0000562dabf3ebc9 in PortalRun (portal=portal(at)entry=0x562dad85b040,
count=count(at)entry=9223372036854775807,
isTopLevel=isTopLevel(at)entry=true, run_once=run_once(at)entry=true,
dest=dest(at)entry=0x562dad7f8608,
altdest=altdest(at)entry=0x562dad7f8608, qc=0x7fff6f60b2a0) at
pquery.c:788
#22 0x0000562dabf3a93b in exec_simple_query (query_string=0x562dad7f75e0
"CREATE INDEX tmp_idx1 ON tmp (a);")
at postgres.c:1214
#23 0x0000562dabf3c541 in PostgresMain (argc=argc(at)entry=1,
argv=argv(at)entry=0x7fff6f60b710, dbname=<optimized out>,
username=<optimized out>) at postgres.c:4486
#24 0x0000562dabea84bd in BackendRun (port=0x562dad81be40,
port=0x562dad81be40) at postmaster.c:4506
#25 BackendStartup (port=0x562dad81be40) at postmaster.c:4228
#26 ServerLoop () at postmaster.c:1745
#27 0x0000562dabea9461 in PostmasterMain (argc=<optimized out>,
argv=<optimized out>) at postmaster.c:1417
#28 0x0000562dabbd72e2 in main (argc=3, argv=0x562dad7f18a0) at main.c:209

Discovered while hunting to another bug related to autovacuum (unfortunately
I still can't produce the reliable reproducing script for that).

Best regards,
Alexander

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Kamigishi Rei 2021-10-29 07:55:17 Re: BUG #17245: Index corruption involving deduplicated entries
Previous Message Andrey Borodin 2021-10-29 06:39:34 Re: conchuela timeouts since 2021-10-09 system upgrade