BUG #16331: segfault in checkpointer with full disk

From: PG Bug reporting form <noreply(at)postgresql(dot)org>
To: pgsql-bugs(at)lists(dot)postgresql(dot)org
Cc: jmlich83(at)gmail(dot)com
Subject: BUG #16331: segfault in checkpointer with full disk
Date: 2020-04-01 08:51:56
Message-ID: 16331-16d6e966536b6390@postgresql.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

The following bug has been logged on the website:

Bug reference: 16331
Logged by: Jozef Mlich
Email address: jmlich83(at)gmail(dot)com
PostgreSQL version: 12.2
Operating system: CentOS
Description:

I can see segfaults on CentOS 7 with postgresql 12.2-2PGDG.rhel7 (from
yum.postgresql.org). I am using multiple extensions (cstore, postgres_fdw,
pgcrypto,dblink, etc.). It seems crash is related to disk run out of space
(I am using separate partion for / and for /var/lib/pgsql). It occurs few
times a day. According to backtrace it seems to be related to checkpointer.
Replication is not configured.

[New LWP 26290]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `postgres: checkpointer
'.
Program terminated with signal 6, Aborted.
#0 0x00007fe4604c1207 in __GI_raise (sig=sig(at)entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:55
55 return INLINE_SYSCALL (tgkill, 3, pid, selftid, sig);

Thread 1 (Thread 0x7fe462e148c0 (LWP 26290)):
#0 0x00007fe4604c1207 in __GI_raise (sig=sig(at)entry=6) at
../nptl/sysdeps/unix/sysv/linux/raise.c:55
resultvar = 0
pid = 26290
selftid = 26290
#1 0x00007fe4604c28f8 in __GI_abort () at abort.c:90
save_stage = 2
act = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0},
sa_mask = {__val = {0, 0, 0, 0, 0, 9268713, 70403103920717,
39808819211026438, 20126216749056, 70394513997832, 9268713, 70403103920719,
17316096998686159616, 20134806683648, 140618848608704, 140618848592800}},
sa_flags = 1615828275, sa_restorer = 0x0}
sigs = {__val = {32, 0 <repeats 15 times>}}
#2 0x000000000087840a in errfinish (dummy=<optimized out>) at elog.c:552
edata = 0xd47040 <errordata>
elevel = 22
oldcontext = 0x171a6d0
econtext = 0x0
__func__ = "errfinish"
#3 0x0000000000706b24 in CheckPointReplicationOrigin () at origin.c:562
tmppath = 0x9e6fa8 "pg_logical/replorigin_checkpoint.tmp"
path = 0x9e6fd0 "pg_logical/replorigin_checkpoint"
tmpfd = <optimized out>
i = <optimized out>
magic = 307747550
crc = 4294967295
__func__ = "CheckPointReplicationOrigin"
#4 0x0000000000509766 in CheckPointGuts (checkPointRedo=243199514360,
flags=flags(at)entry=256) at xlog.c:9007
No locals.
#5 0x0000000000510106 in CreateCheckPoint (flags=flags(at)entry=256) at
xlog.c:8795
shutdown = false
checkPoint = {redo = 243199514360, ThisTimeLineID = 1,
PrevTimeLineID = 1, fullPageWrites = true, nextFullXid = {value = 20251117},
nextOid = 9645082, nextMulti = 3, nextMultiOffset = 5, oldestXid = 479,
oldestXidDB = 14186, oldestMulti = 1, oldestMultiDB = 1, time = 1585692001,
oldestCommitTsXid = 0, newestCommitTsXid = 0, oldestActiveXid = 20251117}
recptr = <optimized out>
_logSegNo = 0
Insert = <optimized out>
freespace = <optimized out>
PriorRedoPtr = <optimized out>
curInsert = 243199514360
last_important_lsn = <optimized out>
vxids = 0x17389d8
nvxids = 0
__func__ = "CreateCheckPoint"
#6 0x00000000006e4982 in CheckpointerMain () at checkpointer.c:481
ckpt_performed = false
do_restartpoint = <optimized out>
flags = 256
do_checkpoint = <optimized out>
now = 1585692001
elapsed_secs = <optimized out>
cur_timeout = <optimized out>
local_sigjmp_buf = {{__jmpbuf = {140732342117584,
-1758805613090239410, 2, 9211212, 0, 24183888, -1758805613174125490,
1759480033986153550}, __mask_was_saved = 1, __saved_mask = {__val =
{18446744066192964103, 0 <repeats 15 times>}}}}
checkpointer_context = 0x171a6d0
__func__ = "CheckpointerMain"
#7 0x000000000051fe75 in AuxiliaryProcessMain (argc=argc(at)entry=2,
argv=argv(at)entry=0x7ffecd42a480) at bootstrap.c:461
progname = 0x8c8d4c "postgres"
flag = <optimized out>
userDoption = 0x0
__func__ = "AuxiliaryProcessMain"
#8 0x00000000006eef70 in StartChildProcess (type=CheckpointerProcess) at
postmaster.c:5392
pid = <optimized out>
av = {0x8c8d4c "postgres", 0x7ffecd42a4d0 "-x4", 0x0, 0x0, 0x16c92e0
"postgres-12", 0x5 <Address 0x5 out of bounds>, 0x7ffecd42a400 "L\215\214",
0x6 <Address 0x6 out of bounds>, 0x0, 0x7fe33f1daff8 "@NL`\344\177"}
ac = 2
typebuf =
"-x4\000\376\177\000\000\000s\345\306y\"O\360\001\000\000\000]\201\240J\000\000\000\000\000\000\000"
#9 0x00000000006f02ba in reaper (postgres_signal_arg=<optimized out>) at
postmaster.c:2973
save_errno = 4
pid = <optimized out>
exitstatus = 0
__func__ = "reaper"
#10 <signal handler called>
No locals.
#11 0x00007fe46057ff53 in __select_nocancel () at
../sysdeps/unix/syscall-template.S:81
No locals.
#12 0x0000000000483484 in ServerLoop () at postmaster.c:1668
timeout = {tv_sec = 59, tv_usec = 872935}
rmask = {fds_bits = {120, 0 <repeats 15 times>}}
selres = <optimized out>
now = <optimized out>
readmask = {fds_bits = {120, 0 <repeats 15 times>}}
last_lockfile_recheck_time = 1585685433
last_touch_time = 1585685433
__func__ = "ServerLoop"
#13 0x00000000006f195f in PostmasterMain (argc=argc(at)entry=3,
argv=argv(at)entry=0x16c9300) at postmaster.c:1377
opt = <optimized out>
status = <optimized out>
userDoption = <optimized out>
listen_addr_saved = true
i = <optimized out>
output_config_variable = <optimized out>
__func__ = "PostmasterMain"
#14 0x0000000000484dd3 in main (argc=3, argv=0x16c9300) at main.c:228
No locals.

Please let me know if I can provide some more information.

best regards,
Jozef Mlich

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message Julien Rouhaud 2020-04-01 09:04:55 Re: BUG #16331: segfault in checkpointer with full disk
Previous Message Fujii Masao 2020-04-01 08:27:22 Re: [BUG] non archived WAL removed during production crash recovery