Re: crash 11.5~ (and 11.4)

From: Justin Pryzby <pryzby(at)telsasoft(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Subject: Re: crash 11.5~ (and 11.4)
Date: 2019-08-07 19:29:28
Message-ID: 20190807192928.GE11185@telsasoft.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Just found this, although I'm not sure what to do about it. If it's corrupt
table data, I can restore from backup.

ts=# VACUUM FREEZE VERBOSE child.huawei_umts_ucell_201908;
INFO: 00000: aggressively vacuuming "child.huawei_umts_ucell_201908"
LOCATION: lazy_scan_heap, vacuumlazy.c:502
ERROR: XX001: found xmin 73850277 from before relfrozenxid 111408920
LOCATION: heap_prepare_freeze_tuple, heapam.c:6853

I confirmed I updated to 11.4 immediately on its release:
[pryzbyj(at)database ~]$ ls -ltc /usr/pgsql-11/bin/postgres
-rwxr-xr-x. 1 root root 7291736 Jun 20 07:09 /usr/pgsql-11/bin/postgres

That table would've been created shortly after midnight on Aug 1 when we loaded
first data for the month. So it was created and processed only on pg11.4,
although the parent has probably been around since pg_upgrade last October.

Here's usable looking bt

#0 heap_prepare_freeze_tuple (tuple=0x7fd3d8fa0058, relfrozenxid=111408920,
relminmxid=1846, cutoff_xid=111658731, cutoff_multi=1846, frz=0x223e930,
totally_frozen_p=0x7ffff5f3848f) at heapam.c:6832
changed = 211
xmax_already_frozen = 127
xmin_frozen = false
freeze_xmax = false
xid = 0
__func__ = "heap_prepare_freeze_tuple"

#1 0x00000000006c8e6e in lazy_scan_heap (onerel=0x7fd37e4f8fa0, options=13,
vacrelstats=0x223e790, Irel=0x223e8d8, nindexes=3, aggressive=true)
at vacuumlazy.c:1151
tuple_totally_frozen = false
itemid = 0x7fd3d8f9e918
buf = 168313
page = 0x7fd3d8f9e900 "\020:"
offnum = 1
maxoff = 3
hastup = true
nfrozen = 0
freespace = 35907664
all_frozen = true
tupgone = false
prev_dead_count = 0
all_visible_according_to_vm = true
all_visible = true
has_dead_tuples = false
visibility_cutoff_xid = 73850277
nblocks = 75494
blkno = 34915
tuple = {t_len = 2212, t_self = {ip_blkid = {bi_hi = 0,
bi_lo = 34915}, ip_posid = 1}, t_tableOid = 2015128626,
t_data = 0x7fd3d8fa0058}
relname = 0x7fd37e4f6ac8 "huawei_umts_ucell_201908"
relfrozenxid = 111408920
relminmxid = 1846
empty_pages = 0
vacuumed_pages = 0
next_fsm_block_to_vacuum = 0
num_tuples = 1
live_tuples = 1
tups_vacuumed = 0
nkeep = 0
nunused = 0
indstats = 0x223e850
i = 32512
ru0 = {tv = {tv_sec = 1565204981, tv_usec = 609217}, ru = {ru_utime = {
tv_sec = 13, tv_usec = 865892}, ru_stime = {tv_sec = 1,
tv_usec = 960701}, ru_maxrss = 136988, ru_ixrss = 0,
ru_idrss = 0, ru_isrss = 0, ru_minflt = 48841, ru_majflt = 1,
ru_nswap = 0, ru_inblock = 196152, ru_oublock = 292656,
ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 849,
ru_nivcsw = 6002}}
vmbuffer = 4267
next_unskippable_block = 34916
skipping_blocks = false
frozen = 0x223e930
buf = {data = 0x0, len = -168590068, maxlen = 32767,
cursor = -168589904}
initprog_index = {0, 1, 5}
initprog_val = {1, 75494, 21968754}
__func__ = "lazy_scan_heap"
#2 0x00000000006c77c7 in lazy_vacuum_rel (onerel=0x7fd37e4f8fa0, options=13,
params=0x7ffff5f38a70, bstrategy=0x22b8cf8) at vacuumlazy.c:265
vacrelstats = 0x223e790
Irel = 0x223e8d8
nindexes = 3
ru0 = {tv = {tv_sec = 140733193388036, tv_usec = 281474985219646},
ru = {ru_utime = {tv_sec = 12933152, tv_usec = 35123496},
ru_stime = {tv_sec = 140737319765996, tv_usec = 4687312},
ru_maxrss = 0, ru_ixrss = -1887226286461968708,
ru_idrss = 35351240, ru_isrss = 35249392,
ru_minflt = 140737319765776, ru_majflt = 35249392, ru_nswap = 20,
ru_inblock = 0, ru_oublock = 140737319765936,
ru_msgsnd = 10990471, ru_msgrcv = 140546333970336,
ru_nsignals = 35249264, ru_nvcsw = 4, ru_nivcsw = 4687312}}
starttime = 0
secs = 72057594037927936
usecs = 2015128626
read_rate = 6.9533474784178538e-310
write_rate = 6.9439115263673584e-310
aggressive = true
scanned_all_unfrozen = false
xidFullScanLimit = 111658735
mxactFullScanLimit = 1846
new_rel_pages = 0
new_rel_allvisible = 36424984
new_live_tuples = 6.9533474784194348e-310
new_frozen_xid = 9080197
new_min_multi = 0
__func__ = "lazy_vacuum_rel"
#3 0x00000000006c72b3 in vacuum_rel (relid=2015128626, relation=0x2161820,
options=13, params=0x7ffff5f38a70) at vacuum.c:1557
lmode = 4
onerel = 0x7fd37e4f8fa0
onerelid = {relId = 2015128626, dbId = 16564}
toast_relid = 2015128673
save_userid = 18712
save_sec_context = 0
save_nestlevel = 2
rel_lock = true
__func__ = "vacuum_rel"
#4 0x00000000006c5939 in vacuum (options=13, relations=0x22b8e70,
params=0x7ffff5f38a70, bstrategy=0x22b8cf8, isTopLevel=true)
at vacuum.c:340
vrel = 0x22b8e10
cur = 0x22b8e48
save_exception_stack = 0x7ffff5f38c20
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {35002584, 9176610699604406608,
4687312, 140737319770032, 0, 0, 9176610699520520528,
-9176587885370396336}, __mask_was_saved = 0, __saved_mask = {
__val = {35906192, 140737319766400, 10964897, 6853, 35905936,
208, 35890568, 21474836680, 35890776, 140737319766560,
10927612, 208, 35890288, 35890288, 35890504, 35890544}}}}
in_vacuum = true
stmttype = 0xc03841 "VACUUM"
in_outer_xact = false
use_own_xacts = true
__func__ = "vacuum"
#5 0x00000000006c54e0 in ExecVacuum (vacstmt=0x2161910, isTopLevel=true)
at vacuum.c:141
params = {freeze_min_age = 0, freeze_table_age = 0,
multixact_freeze_min_age = 0, multixact_freeze_table_age = 0,
is_wraparound = false, log_min_duration = -1}
__func__ = "ExecVacuum"

errfinish one seems to be not useful?

(gdb) bt
#0 errfinish (dummy=0) at elog.c:415
#1 0x000000000054aefd in ShowTransactionStateRec (
str=0xafed12 "StartTransaction", s=0xf9efa0) at xact.c:5157
#2 0x000000000054ad5c in ShowTransactionState (
str=0xafed12 "StartTransaction") at xact.c:5130
#3 0x00000000005470c8 in StartTransaction () at xact.c:1961
#4 0x0000000000547db9 in StartTransactionCommand () at xact.c:2734
#5 0x00000000008cbe65 in start_xact_command () at postgres.c:2500
#6 0x00000000008c9772 in exec_simple_query (
query_string=0x2160d28 "VACUUM FREEZE VERBOSE child.huawei_umts_ucell_201908;") at postgres.c:948
#7 0x00000000008cddf3 in PostgresMain (argc=1, argv=0x218c768,
dbname=0x218c5d0 "ts", username=0x218c5b0 "pryzbyj") at postgres.c:4182
#8 0x000000000082a098 in BackendRun (port=0x21818d0) at postmaster.c:4358
#9 0x0000000000829806 in BackendStartup (port=0x21818d0) at postmaster.c:4030
#10 0x0000000000825cab in ServerLoop () at postmaster.c:1707
#11 0x00000000008255dd in PostmasterMain (argc=3, argv=0x215b9a0)
at postmaster.c:1380
#12 0x000000000074ba30 in main (argc=3, argv=0x215b9a0) at main.c:228

#0 errfinish (dummy=0) at elog.c:415
edata = 0xfe7b80
elevel = 0
oldcontext = 0x215df90
econtext = 0xa4d11a
__func__ = "errfinish"
#1 0x000000000054aefd in ShowTransactionStateRec (
str=0xafed12 "StartTransaction", s=0xf9efa0) at xact.c:5157
buf = {data = 0x22462d8 "", len = 0, maxlen = 1024, cursor = 0}
__func__ = "ShowTransactionStateRec"
#2 0x000000000054ad5c in ShowTransactionState (
str=0xafed12 "StartTransaction") at xact.c:5130
No locals.
#3 0x00000000005470c8 in StartTransaction () at xact.c:1961
s = 0xf9efa0
vxid = {backendId = 5, localTransactionId = 16}
#4 0x0000000000547db9 in StartTransactionCommand () at xact.c:2734
s = 0xf9efa0
__func__ = "StartTransactionCommand"
#5 0x00000000008cbe65 in start_xact_command () at postgres.c:2500
No locals.

I looked at this briefly:

ts=# SELECT * FROM page_header(get_raw_page('child.huawei_umts_ucell_201908', 0));
lsn | 3A49/66F44310
checksum | 0
flags | 4
lower | 36
upper | 1432
special | 8192
pagesize | 8192
version | 4
prune_xid | 0

In case someone wants me to look at bits and pieces, I saved a copy of:
relfilenode | 2015128626

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Ibrar Ahmed 2019-08-07 19:57:02 Re: initdb: Use varargs macro for PG_CMD_PRINTF
Previous Message Ashwin Agrawal 2019-08-07 18:53:39 Re: Remove HeapTuple and Buffer dependency for predicate locking functions