Re: pg11.1: dsa_area could not attach to segment

From: Justin Pryzby <pryzby(at)telsasoft(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Cc: Thomas Munro <thomas(dot)munro(at)enterprisedb(dot)com>
Subject: Re: pg11.1: dsa_area could not attach to segment
Date: 2019-02-05 16:35:09
Message-ID: 20190205163509.GM29720@telsasoft.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

I finally reproduced this with core..

For some reason I needed to write assert() rather than elog(PANIC), otherwise
it failed with ERROR and no core..

@@ -1741,4 +1743,5 @@ get_segment_by_index(dsa_area *area, dsa_segment_index index)
segment = dsm_attach(handle);
+ assert (segment != NULL);
if (segment == NULL)
- elog(ERROR, "dsa_area could not attach to segment");
+ elog(PANIC, "dsa_area could not attach to segment");
if (area->mapping_pinned)

On Mon, Dec 03, 2018 at 11:45:00AM +1300, Thomas Munro wrote:
> If anyone can reproduce this problem with a debugger, it'd be
> interesting to see the output of dsa_dump(area), and
> FreePageManagerDump(segment_map->fpm).

Looks like this will take some work, is it ok if I make a coredump available to
you ? I'm not sure how sensitive it is to re/compilation, but I'm using PG11.1
compiled locally on centos6.

/var/log/postgresql/postgresql-2019-02-05_111730.log-< 2019-02-05 11:17:31.372 EST >LOG: background worker "parallel worker" (PID 17110) was terminated by signal 6: Aborted
/var/log/postgresql/postgresql-2019-02-05_111730.log:< 2019-02-05 11:17:31.372 EST >DETAIL: Failed process was running: SELECT colcld.child c, parent p, array_agg(colpar.attname::text ORDER BY colpar.attnum) cols, array_agg(format_type(colpar.atttypid, colpar.atttypmod) ORDER BY colpar.attnum) AS types FROM queued_alters qa JOIN pg_attribute colpar ON to_regclass(qa.parent)=colpar.attrelid AND colpar.attnum>0 AND NOT colpar.attisdropped JOIN (SELECT *, attrelid::regclass::text AS child FROM pg_attribute) colcld ON to_regclass(qa.child) =colcld.attrelid AND colcld.attnum>0 AND NOT colcld.attisdropped WHERE colcld.attname=colpar.attname AND colpar.atttypid!=colcld.atttypid GROUP BY 1,2 ORDER BY parent LIKE 'unused%', regexp_replace(colcld.child, '.*_((([0-9]{4}_[0-9]{2})_[0-9]{2})|(([0-9]{6})([0-9]{2})?))$', '\3\5') DESC, regexp_replace(colcld.child, '.*_', '') DESC LIMIT 1

(gdb) bt
#0 0x00000037b9c32495 in raise () from /lib64/libc.so.6
#1 0x00000037b9c33c75 in abort () from /lib64/libc.so.6
#2 0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
#3 0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
#4 0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value optimized out>) at dsa.c:1744
#5 0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at dsa.c:1995
#6 0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, flags=0) at dsa.c:703
#7 0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, size=104, shared=0x7ffc6b5cfc48) at nodeHash.c:2837
#8 0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, slot=<value optimized out>, hashvalue=423104953) at nodeHash.c:1693
#9 0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at nodeHash.c:288
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at nodeHashjoin.c:290
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
#13 0x0000000000638ce0 in ExecProcNodeInstr (node=0x2793038) at execProcnode.c:461
#14 0x00000000006349c7 in ExecProcNode (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56) at ../../../src/include/executor/executor.h:237
#15 ExecutePlan (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56) at execMain.c:1723
#16 standard_ExecutorRun (queryDesc=0x2782cd0, direction=<value optimized out>, count=0, execute_once=56) at execMain.c:364
#17 0x00007f84a97c8618 in pgss_ExecutorRun (queryDesc=0x2782cd0, direction=ForwardScanDirection, count=0, execute_once=true) at pg_stat_statements.c:892
#18 0x00007f84a93357dd in explain_ExecutorRun (queryDesc=0x2782cd0, direction=ForwardScanDirection, count=0, execute_once=true) at auto_explain.c:268
#19 0x0000000000635071 in ParallelQueryMain (seg=0x268fba8, toc=0x7f84a9578000) at execParallel.c:1402
#20 0x0000000000508f34 in ParallelWorkerMain (main_arg=<value optimized out>) at parallel.c:1409
#21 0x0000000000704760 in StartBackgroundWorker () at bgworker.c:834
#22 0x000000000070e11c in do_start_bgworker () at postmaster.c:5698
#23 maybe_start_bgworkers () at postmaster.c:5911
#24 0x0000000000710786 in sigusr1_handler (postgres_signal_arg=<value optimized out>) at postmaster.c:5091
#25 <signal handler called>
#26 0x00000037b9ce1603 in __select_nocancel () from /lib64/libc.so.6
#27 0x000000000071300e in ServerLoop (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1670
#28 PostmasterMain (argc=<value optimized out>, argv=<value optimized out>) at postmaster.c:1379
#29 0x000000000067e8c0 in main (argc=3, argv=0x265f960) at main.c:228

#0 0x00000037b9c32495 in raise () from /lib64/libc.so.6
No symbol table info available.
#1 0x00000037b9c33c75 in abort () from /lib64/libc.so.6
No symbol table info available.
#2 0x00000037b9c2b60e in __assert_fail_base () from /lib64/libc.so.6
No symbol table info available.
#3 0x00000037b9c2b6d0 in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#4 0x00000000008c4a72 in get_segment_by_index (area=0x2788440, index=<value optimized out>) at dsa.c:1744
handle = <value optimized out>
segment = 0x0
segment_map = <value optimized out>
__func__ = "get_segment_by_index"
__PRETTY_FUNCTION__ = "get_segment_by_index"
#5 0x00000000008c58e9 in get_best_segment (area=0x2788440, npages=8) at dsa.c:1995
segment_map = <value optimized out>
next_segment_index = <value optimized out>
contiguous_pages = <value optimized out>
threshold = 512
segment_index = 10
bin = <value optimized out>
#6 0x00000000008c6c99 in dsa_allocate_extended (area=0x2788440, size=32768, flags=0) at dsa.c:703
npages = 8
first_page = <value optimized out>
span_pointer = 8796097199728
pool = 0x7f84a9579730
size_class = <value optimized out>
start_pointer = <value optimized out>
segment_map = <value optimized out>
result = 140207753496128
__func__ = "dsa_allocate_extended"
__PRETTY_FUNCTION__ = "dsa_allocate_extended"
#7 0x000000000064c6fe in ExecParallelHashTupleAlloc (hashtable=0x27affb0, size=104, shared=0x7ffc6b5cfc48) at nodeHash.c:2837
pstate = 0x7f84a9578540
chunk_shared = <value optimized out>
chunk = <value optimized out>
chunk_size = 32768
result = <value optimized out>
curbatch = 0
#8 0x000000000064cb92 in ExecParallelHashTableInsert (hashtable=0x27affb0, slot=<value optimized out>, hashvalue=423104953) at nodeHash.c:1693
hashTuple = <value optimized out>
tuple = 0x27b00c8
shared = <value optimized out>
bucketno = 1577401
batchno = 0
#9 0x000000000064cf17 in MultiExecParallelHash (node=0x27a1ed8) at nodeHash.c:288
outerNode = 0x27a1ff0
hashkeys = 0x27af110
slot = 0x27a3d70
econtext = 0x27a3798
hashvalue = 423104953
i = <value optimized out>
pstate = 0x7f84a9578540
hashtable = 0x27affb0
build_barrier = 0x7f84a9578590
#10 MultiExecHash (node=0x27a1ed8) at nodeHash.c:112
No locals.
#11 0x000000000064e1f8 in ExecHashJoinImpl (pstate=0x2793038) at nodeHashjoin.c:290
outerNode = 0x2792f20
hashNode = 0x27a1ed8
econtext = 0x2792c68
outerTupleSlot = 0x1
node = 0x2793038
joinqual = 0x27ac270
otherqual = 0x0
hashtable = 0x27affb0
hashvalue = 0
batchno = 41493896
parallel_state = 0x7f84a9578540
#12 ExecParallelHashJoin (pstate=0x2793038) at nodeHashjoin.c:581
No locals.

Justin

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Bruce Momjian 2019-02-05 16:35:58 Re: Commit Fest 2019-01 is now closed
Previous Message Tom Lane 2019-02-05 16:20:52 Re: Fix optimization of foreign-key on update actions